From 03a79346445f769561e56bdb6dd724d1a0c49b29 Mon Sep 17 00:00:00 2001
From: Pavel Krajcevski <pavel@cs.unc.edu>
Date: Mon, 26 Aug 2013 16:54:08 -0400
Subject: [PATCH] Get rid of evil tabs once and forever (from cpp/h files)

---
 BPTCEncoder/src/BC7CompressorSIMD.cpp | 1991 ++++++++++++-------------
 BPTCEncoder/src/BCLookupTables.h      | 1762 +++++++++++-----------
 BPTCEncoder/src/BitStream.h           |    2 +-
 BPTCEncoder/src/RGBAEndpoints.cpp     |  713 +++++----
 BPTCEncoder/src/RGBAEndpoints.h       |  526 +++----
 BPTCEncoder/src/RGBAEndpointsSIMD.cpp |  460 +++---
 BPTCEncoder/src/RGBAEndpointsSIMD.h   |  514 +++----
 CLTool/src/clwin32.cpp                |   46 +-
 Core/include/Image.h                  |    2 +-
 Core/src/BlockStats.cpp               |   10 +-
 Core/src/CompressedImage.cpp          |   14 +-
 Core/src/Image.cpp                    |   14 +-
 Core/src/StopWatch.h                  |   22 +-
 Core/src/StopWatchOSX.cpp             |    2 +-
 Core/src/StopWatchUnix.cpp            |    2 +-
 Core/src/TexComp.cpp                  |    2 +-
 Core/src/ThreadGroup.cpp              |    4 +-
 Core/src/WorkerQueue.cpp              |   39 +-
 IO/src/FileStreamWin32.cpp            |  131 +-
 IO/src/ImageFile.cpp                  |    4 +-
 IO/src/ImageLoader.cpp                |   76 +-
 IO/src/ImageLoaderPNG.cpp             |  114 +-
 IO/src/ImageWriterPNG.cpp             |  108 +-
 IO/src/ImageWriterPNG.h               |    4 +-
 24 files changed, 3303 insertions(+), 3259 deletions(-)

diff --git a/BPTCEncoder/src/BC7CompressorSIMD.cpp b/BPTCEncoder/src/BC7CompressorSIMD.cpp
index 6d7c30e..a5fe285 100755
--- a/BPTCEncoder/src/BC7CompressorSIMD.cpp
+++ b/BPTCEncoder/src/BC7CompressorSIMD.cpp
@@ -78,37 +78,37 @@
 
 static const uint32 kNumShapes2 = 64;
 static const uint16 kShapeMask2[kNumShapes2] = {
-	0xcccc, 0x8888, 0xeeee, 0xecc8, 0xc880, 0xfeec, 0xfec8, 0xec80,
-	0xc800, 0xffec, 0xfe80, 0xe800, 0xffe8, 0xff00, 0xfff0, 0xf000,
-	0xf710, 0x008e, 0x7100, 0x08ce, 0x008c, 0x7310, 0x3100, 0x8cce,
-	0x088c, 0x3110, 0x6666, 0x366c, 0x17e8, 0x0ff0, 0x718e, 0x399c,
-	0xaaaa, 0xf0f0, 0x5a5a, 0x33cc, 0x3c3c, 0x55aa, 0x9696, 0xa55a,
-	0x73ce, 0x13c8, 0x324c, 0x3bdc, 0x6996, 0xc33c, 0x9966, 0x0660,
-	0x0272, 0x04e4, 0x4e40, 0x2720, 0xc936, 0x936c, 0x39c6, 0x639c,
-	0x9336, 0x9cc6, 0x817e, 0xe718, 0xccf0, 0x0fcc, 0x7744, 0xee22
+  0xcccc, 0x8888, 0xeeee, 0xecc8, 0xc880, 0xfeec, 0xfec8, 0xec80,
+  0xc800, 0xffec, 0xfe80, 0xe800, 0xffe8, 0xff00, 0xfff0, 0xf000,
+  0xf710, 0x008e, 0x7100, 0x08ce, 0x008c, 0x7310, 0x3100, 0x8cce,
+  0x088c, 0x3110, 0x6666, 0x366c, 0x17e8, 0x0ff0, 0x718e, 0x399c,
+  0xaaaa, 0xf0f0, 0x5a5a, 0x33cc, 0x3c3c, 0x55aa, 0x9696, 0xa55a,
+  0x73ce, 0x13c8, 0x324c, 0x3bdc, 0x6996, 0xc33c, 0x9966, 0x0660,
+  0x0272, 0x04e4, 0x4e40, 0x2720, 0xc936, 0x936c, 0x39c6, 0x639c,
+  0x9336, 0x9cc6, 0x817e, 0xe718, 0xccf0, 0x0fcc, 0x7744, 0xee22
 };
 
 static const int kAnchorIdx2[kNumShapes2] = {
-	15,15,15,15,15,15,15,15,
-    15,15,15,15,15,15,15,15,
-    15, 2, 8, 2, 2, 8, 8,15,
-     2, 8, 2, 2, 8, 8, 2, 2,
-    15,15, 6, 8, 2, 8,15,15,
-     2, 8, 2, 2, 2,15,15, 6,
-     6, 2, 6, 8,15,15, 2, 2,
-    15,15,15,15,15, 2, 2, 15
+  15,15,15,15,15,15,15,15,
+  15,15,15,15,15,15,15,15,
+  15, 2, 8, 2, 2, 8, 8,15,
+  2, 8, 2, 2, 8, 8, 2, 2,
+  15,15, 6, 8, 2, 8,15,15,
+  2, 8, 2, 2, 2,15,15, 6,
+  6, 2, 6, 8,15,15, 2, 2,
+  15,15,15,15,15, 2, 2, 15
 };
 
 static const uint32 kNumShapes3 = 64;
 static const uint16 kShapeMask3[kNumShapes3][2] = {
-	{ 0xfecc, 0xf600 }, { 0xffc8, 0x7300 }, { 0xff90, 0x3310 }, { 0xecce, 0x00ce }, { 0xff00, 0xcc00 }, { 0xcccc, 0xcc00 }, { 0xffcc, 0x00cc }, { 0xffcc, 0x3300 },
-	{ 0xff00, 0xf000 }, { 0xfff0, 0xf000 }, { 0xfff0, 0xff00 }, { 0xcccc, 0x8888 }, { 0xeeee, 0x8888 }, { 0xeeee, 0xcccc }, { 0xffec, 0xec80 }, { 0x739c, 0x7310 },
-	{ 0xfec8, 0xc800 }, { 0x39ce, 0x3100 }, { 0xfff0, 0xccc0 }, { 0xfccc, 0x0ccc }, { 0xeeee, 0xee00 }, { 0xff88, 0x7700 }, { 0xeec0, 0xcc00 }, { 0x7730, 0x3300 },
-	{ 0x0cee, 0x00cc }, { 0xffcc, 0xfc88 }, { 0x6ff6, 0x0660 }, { 0xff60, 0x6600 }, { 0xcbbc, 0xc88c }, { 0xf966, 0xf900 }, { 0xceec, 0x0cc0 }, { 0xff10, 0x7310 },
-	{ 0xff80, 0xec80 }, { 0xccce, 0x08ce }, { 0xeccc, 0xec80 }, { 0x6666, 0x4444 }, { 0x0ff0, 0x0f00 }, { 0x6db6, 0x4924 }, { 0x6bd6, 0x4294 }, { 0xcf3c, 0x0c30 },
-	{ 0xc3fc, 0x03c0 }, { 0xffaa, 0xff00 }, { 0xff00, 0x5500 }, { 0xfcfc, 0xcccc }, { 0xcccc, 0x0c0c }, { 0xf6f6, 0x6666 }, { 0xaffa, 0x0ff0 }, { 0xfff0, 0x5550 },
-	{ 0xfaaa, 0xf000 }, { 0xeeee, 0x0e0e }, { 0xf8f8, 0x8888 }, { 0xfff0, 0x9990 }, { 0xeeee, 0xe00e }, { 0x8ff8, 0x8888 }, { 0xf666, 0xf000 }, { 0xff00, 0x9900 },
-	{ 0xff66, 0xff00 }, { 0xcccc, 0xc00c }, { 0xcffc, 0xcccc }, { 0xf000, 0x9000 }, { 0x8888, 0x0808 }, { 0xfefe, 0xeeee }, { 0xfffa, 0xfff0 }, { 0x7bde, 0x7310 }
+  { 0xfecc, 0xf600 }, { 0xffc8, 0x7300 }, { 0xff90, 0x3310 }, { 0xecce, 0x00ce }, { 0xff00, 0xcc00 }, { 0xcccc, 0xcc00 }, { 0xffcc, 0x00cc }, { 0xffcc, 0x3300 },
+  { 0xff00, 0xf000 }, { 0xfff0, 0xf000 }, { 0xfff0, 0xff00 }, { 0xcccc, 0x8888 }, { 0xeeee, 0x8888 }, { 0xeeee, 0xcccc }, { 0xffec, 0xec80 }, { 0x739c, 0x7310 },
+  { 0xfec8, 0xc800 }, { 0x39ce, 0x3100 }, { 0xfff0, 0xccc0 }, { 0xfccc, 0x0ccc }, { 0xeeee, 0xee00 }, { 0xff88, 0x7700 }, { 0xeec0, 0xcc00 }, { 0x7730, 0x3300 },
+  { 0x0cee, 0x00cc }, { 0xffcc, 0xfc88 }, { 0x6ff6, 0x0660 }, { 0xff60, 0x6600 }, { 0xcbbc, 0xc88c }, { 0xf966, 0xf900 }, { 0xceec, 0x0cc0 }, { 0xff10, 0x7310 },
+  { 0xff80, 0xec80 }, { 0xccce, 0x08ce }, { 0xeccc, 0xec80 }, { 0x6666, 0x4444 }, { 0x0ff0, 0x0f00 }, { 0x6db6, 0x4924 }, { 0x6bd6, 0x4294 }, { 0xcf3c, 0x0c30 },
+  { 0xc3fc, 0x03c0 }, { 0xffaa, 0xff00 }, { 0xff00, 0x5500 }, { 0xfcfc, 0xcccc }, { 0xcccc, 0x0c0c }, { 0xf6f6, 0x6666 }, { 0xaffa, 0x0ff0 }, { 0xfff0, 0x5550 },
+  { 0xfaaa, 0xf000 }, { 0xeeee, 0x0e0e }, { 0xf8f8, 0x8888 }, { 0xfff0, 0x9990 }, { 0xeeee, 0xe00e }, { 0x8ff8, 0x8888 }, { 0xf666, 0xf000 }, { 0xff00, 0x9900 },
+  { 0xff66, 0xff00 }, { 0xcccc, 0xc00c }, { 0xcffc, 0xcccc }, { 0xf000, 0x9000 }, { 0x8888, 0x0808 }, { 0xfefe, 0xeeee }, { 0xfffa, 0xfff0 }, { 0x7bde, 0x7310 }
 };
 
 static const uint32 kWMValues[] = { 0x32b92180, 0x32ba3080, 0x31103200, 0x28103c80, 0x32bb3080, 0x25903600, 0x3530b900, 0x3b32b180, 0x34b5b980 };
@@ -116,83 +116,83 @@ static const uint32 kNumWMVals = sizeof(kWMValues) / sizeof(kWMValues[0]);
 static uint32 gWMVal = -1;
 
 static const int kAnchorIdx3[2][kNumShapes3] = {
-	{ 3, 3,15,15, 8, 3,15,15,
-     8, 8, 6, 6, 6, 5, 3, 3,
-     3, 3, 8,15, 3, 3, 6,10,
-     5, 8, 8, 6, 8, 5,15,15,
-     8,15, 3, 5, 6,10, 8,15,
+  { 3, 3,15,15, 8, 3,15,15,
+    8, 8, 6, 6, 6, 5, 3, 3,
+    3, 3, 8,15, 3, 3, 6,10,
+    5, 8, 8, 6, 8, 5,15,15,
+    8,15, 3, 5, 6,10, 8,15,
     15, 3,15, 5,15,15,15,15,
-     3,15, 5, 5, 5, 8, 5,10,
-	 5,10, 8,13,15,12, 3, 3 },
+    3,15, 5, 5, 5, 8, 5,10,
+    5,10, 8,13,15,12, 3, 3 },
 
-	{ 15, 8, 8, 3,15,15, 3, 8,
+  { 15, 8, 8, 3,15,15, 3, 8,
     15,15,15,15,15,15,15, 8,
     15, 8,15, 3,15, 8,15, 8,
-     3,15, 6,10,15,15,10, 8,
+    3,15, 6,10,15,15,10, 8,
     15, 3,15,10,10, 8, 9,10,
-     6,15, 8,15, 3, 6, 6, 8,
+    6,15, 8,15, 3, 6, 6, 8,
     15, 3,15,15,15,15,15,15,
-	15,15,15,15, 3,15,15, 8 }
+    15,15,15,15, 3,15,15, 8 }
 };
 
 const uint32 kBC7InterpolationValuesScalar[4][16][2] = {
-	{ {64, 0}, {33, 31}, {0, 64}, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
-	{ {64, 0}, {43, 21}, {21, 43}, {0, 64}, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
-	{ {64, 0}, {55, 9}, {46, 18}, {37, 27}, {27, 37}, {18, 46}, {9, 55}, {0, 64}, 0, 0, 0, 0, 0, 0, 0, 0 },
-	{ {64, 0}, {60, 4}, {55, 9}, {51, 13}, {47, 17}, {43, 21}, {38, 26}, {34, 30}, {30, 34}, {26, 38}, {21, 43}, {17, 47}, {13, 51}, {9, 55}, {4, 60}, {0, 64} }
+  { {64, 0}, {33, 31}, {0, 64}, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+  { {64, 0}, {43, 21}, {21, 43}, {0, 64}, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+  { {64, 0}, {55, 9}, {46, 18}, {37, 27}, {27, 37}, {18, 46}, {9, 55}, {0, 64}, 0, 0, 0, 0, 0, 0, 0, 0 },
+  { {64, 0}, {60, 4}, {55, 9}, {51, 13}, {47, 17}, {43, 21}, {38, 26}, {34, 30}, {30, 34}, {26, 38}, {21, 43}, {17, 47}, {13, 51}, {9, 55}, {4, 60}, {0, 64} }
 };
 
 static const ALIGN_SSE uint32 kZeroVector[4] = { 0, 0, 0, 0 };
 const __m128i kBC7InterpolationValuesSIMD[4][16][2] = {
-	{ 
-		{ _mm_set1_epi32(64), *((const __m128i *)kZeroVector)}, 
-		{ _mm_set1_epi32(33), _mm_set1_epi32(31) }, 
-		{ *((const __m128i *)kZeroVector), _mm_set1_epi32(64) }, 
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 
-	},
-	{ 
-		{ _mm_set1_epi32(64), *((const __m128i *)kZeroVector)}, 
-		{ _mm_set1_epi32(43), _mm_set1_epi32(21)}, 
-		{ _mm_set1_epi32(21), _mm_set1_epi32(43)}, 
-		{ *((const __m128i *)kZeroVector), _mm_set1_epi32(64)}, 
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 
-	},
-	{ 
-		{ _mm_set1_epi32(64), *((const __m128i *)kZeroVector) }, 
-		{ _mm_set1_epi32(55), _mm_set1_epi32(9) }, 
-		{ _mm_set1_epi32(46), _mm_set1_epi32(18)}, 
-		{ _mm_set1_epi32(37), _mm_set1_epi32(27)}, 
-		{ _mm_set1_epi32(27), _mm_set1_epi32(37)}, 
-		{ _mm_set1_epi32(18), _mm_set1_epi32(46)}, 
-		{ _mm_set1_epi32(9), _mm_set1_epi32(55)}, 
-		{ *((const __m128i *)kZeroVector), _mm_set1_epi32(64)}, 
-		0, 0, 0, 0, 0, 0, 0, 0 
-	},
-	{ 
-		{ _mm_set1_epi32(64), *((const __m128i *)kZeroVector)}, 
-		{ _mm_set1_epi32(60), _mm_set1_epi32(4)}, 
-		{ _mm_set1_epi32(55), _mm_set1_epi32(9)}, 
-		{ _mm_set1_epi32(51), _mm_set1_epi32(13)}, 
-		{ _mm_set1_epi32(47), _mm_set1_epi32(17)}, 
-		{ _mm_set1_epi32(43), _mm_set1_epi32(21)}, 
-		{ _mm_set1_epi32(38), _mm_set1_epi32(26)}, 
-		{ _mm_set1_epi32(34), _mm_set1_epi32(30)}, 
-		{ _mm_set1_epi32(30), _mm_set1_epi32(34)}, 
-		{ _mm_set1_epi32(26), _mm_set1_epi32(38)}, 
-		{ _mm_set1_epi32(21), _mm_set1_epi32(43)}, 
-		{ _mm_set1_epi32(17), _mm_set1_epi32(47)}, 
-		{ _mm_set1_epi32(13), _mm_set1_epi32(51)}, 
-		{ _mm_set1_epi32(9), _mm_set1_epi32(55)}, 
-		{ _mm_set1_epi32(4), _mm_set1_epi32(60)}, 
-		{ *((const __m128i *)kZeroVector), _mm_set1_epi32(64)} 
-	}
+  { 
+    { _mm_set1_epi32(64), *((const __m128i *)kZeroVector)}, 
+    { _mm_set1_epi32(33), _mm_set1_epi32(31) }, 
+    { *((const __m128i *)kZeroVector), _mm_set1_epi32(64) }, 
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 
+  },
+  { 
+    { _mm_set1_epi32(64), *((const __m128i *)kZeroVector)}, 
+    { _mm_set1_epi32(43), _mm_set1_epi32(21)}, 
+    { _mm_set1_epi32(21), _mm_set1_epi32(43)}, 
+    { *((const __m128i *)kZeroVector), _mm_set1_epi32(64)}, 
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 
+  },
+  { 
+    { _mm_set1_epi32(64), *((const __m128i *)kZeroVector) }, 
+    { _mm_set1_epi32(55), _mm_set1_epi32(9) }, 
+    { _mm_set1_epi32(46), _mm_set1_epi32(18)}, 
+    { _mm_set1_epi32(37), _mm_set1_epi32(27)}, 
+    { _mm_set1_epi32(27), _mm_set1_epi32(37)}, 
+    { _mm_set1_epi32(18), _mm_set1_epi32(46)}, 
+    { _mm_set1_epi32(9), _mm_set1_epi32(55)}, 
+    { *((const __m128i *)kZeroVector), _mm_set1_epi32(64)}, 
+    0, 0, 0, 0, 0, 0, 0, 0 
+  },
+  { 
+    { _mm_set1_epi32(64), *((const __m128i *)kZeroVector)}, 
+    { _mm_set1_epi32(60), _mm_set1_epi32(4)}, 
+    { _mm_set1_epi32(55), _mm_set1_epi32(9)}, 
+    { _mm_set1_epi32(51), _mm_set1_epi32(13)}, 
+    { _mm_set1_epi32(47), _mm_set1_epi32(17)}, 
+    { _mm_set1_epi32(43), _mm_set1_epi32(21)}, 
+    { _mm_set1_epi32(38), _mm_set1_epi32(26)}, 
+    { _mm_set1_epi32(34), _mm_set1_epi32(30)}, 
+    { _mm_set1_epi32(30), _mm_set1_epi32(34)}, 
+    { _mm_set1_epi32(26), _mm_set1_epi32(38)}, 
+    { _mm_set1_epi32(21), _mm_set1_epi32(43)}, 
+    { _mm_set1_epi32(17), _mm_set1_epi32(47)}, 
+    { _mm_set1_epi32(13), _mm_set1_epi32(51)}, 
+    { _mm_set1_epi32(9), _mm_set1_epi32(55)}, 
+    { _mm_set1_epi32(4), _mm_set1_epi32(60)}, 
+    { *((const __m128i *)kZeroVector), _mm_set1_epi32(64)} 
+  }
 };
 
 static const ALIGN_SSE uint32 kByteValMask[4] = { 0xFF, 0xFF, 0xFF, 0xFF };
 static inline __m128i sad(const __m128i &a, const __m128i &b) {
-	const __m128i maxab = _mm_max_epu8(a, b);
-	const __m128i minab = _mm_min_epu8(a, b);
-	return _mm_and_si128( *((const __m128i *)kByteValMask), _mm_subs_epu8( maxab, minab ) );
+  const __m128i maxab = _mm_max_epu8(a, b);
+  const __m128i minab = _mm_min_epu8(a, b);
+  return _mm_and_si128( *((const __m128i *)kByteValMask), _mm_subs_epu8( maxab, minab ) );
 }
 
 #include <cstdio>
@@ -220,211 +220,211 @@ int BC7CompressionModeSIMD::MaxAnnealingIterations = 50; // This is a setting.
 int BC7CompressionModeSIMD::NumUses[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
 
 BC7CompressionModeSIMD::Attributes BC7CompressionModeSIMD::kModeAttributes[kNumModes] = {
-	{ 0, 4, 3, 3, 4, 4, 4, 0, BC7CompressionModeSIMD::ePBitType_NotShared },
-	{ 1, 6, 2, 3, 6, 6, 6, 0, BC7CompressionModeSIMD::ePBitType_Shared },
-	{ 2, 6, 3, 2, 5, 5, 5, 0, BC7CompressionModeSIMD::ePBitType_None },
-	{ 3, 6, 2, 2, 7, 7, 7, 0, BC7CompressionModeSIMD::ePBitType_NotShared },
-	{ 0 }, // Mode 4 not supported
-	{ 0 }, // Mode 5 not supported
-	{ 6, 0, 1, 4, 7, 7, 7, 7, BC7CompressionModeSIMD::ePBitType_NotShared },
-	{ 7, 6, 2, 2, 5, 5, 5, 5, BC7CompressionModeSIMD::ePBitType_NotShared },
+  { 0, 4, 3, 3, 4, 4, 4, 0, BC7CompressionModeSIMD::ePBitType_NotShared },
+  { 1, 6, 2, 3, 6, 6, 6, 0, BC7CompressionModeSIMD::ePBitType_Shared },
+  { 2, 6, 3, 2, 5, 5, 5, 0, BC7CompressionModeSIMD::ePBitType_None },
+  { 3, 6, 2, 2, 7, 7, 7, 0, BC7CompressionModeSIMD::ePBitType_NotShared },
+  { 0 }, // Mode 4 not supported
+  { 0 }, // Mode 5 not supported
+  { 6, 0, 1, 4, 7, 7, 7, 7, BC7CompressionModeSIMD::ePBitType_NotShared },
+  { 7, 6, 2, 2, 5, 5, 5, 5, BC7CompressionModeSIMD::ePBitType_NotShared },
 };
 
 void BC7CompressionModeSIMD::ClampEndpointsToGrid(RGBAVectorSIMD &p1, RGBAVectorSIMD &p2, int &bestPBitCombo) const {
-	const int nPbitCombos = GetNumPbitCombos();
-	const bool hasPbits = nPbitCombos > 1;
-	__m128i qmask;
-	GetQuantizationMask(qmask);
+  const int nPbitCombos = GetNumPbitCombos();
+  const bool hasPbits = nPbitCombos > 1;
+  __m128i qmask;
+  GetQuantizationMask(qmask);
 
-	ClampEndpoints(p1, p2);
+  ClampEndpoints(p1, p2);
 
-	// !SPEED! This can be faster. We're searching through all possible
-	// pBit combos to find the best one. Instead, we should be seeing what
-	// the pBit type is for this compression mode and finding the closest 
-	// quantization.
-	float minDist = FLT_MAX;
-	RGBAVectorSIMD bp1, bp2;
-	for(int i = 0; i < nPbitCombos; i++) {
+  // !SPEED! This can be faster. We're searching through all possible
+  // pBit combos to find the best one. Instead, we should be seeing what
+  // the pBit type is for this compression mode and finding the closest 
+  // quantization.
+  float minDist = FLT_MAX;
+  RGBAVectorSIMD bp1, bp2;
+  for(int i = 0; i < nPbitCombos; i++) {
 
-		__m128i qp1, qp2;
-		if(hasPbits) {
-			qp1 = p1.ToPixel(qmask, GetPBitCombo(i)[0]);
-			qp2 = p2.ToPixel(qmask, GetPBitCombo(i)[1]);
-		}
-		else {
-			qp1 = p1.ToPixel(qmask);
-			qp2 = p2.ToPixel(qmask);
-		}
+    __m128i qp1, qp2;
+    if(hasPbits) {
+      qp1 = p1.ToPixel(qmask, GetPBitCombo(i)[0]);
+      qp2 = p2.ToPixel(qmask, GetPBitCombo(i)[1]);
+    }
+    else {
+      qp1 = p1.ToPixel(qmask);
+      qp2 = p2.ToPixel(qmask);
+    }
 
-		RGBAVectorSIMD np1 = RGBAVectorSIMD( _mm_cvtepi32_ps( qp1 ) );
-		RGBAVectorSIMD np2 = RGBAVectorSIMD( _mm_cvtepi32_ps( qp2 ) );
+    RGBAVectorSIMD np1 = RGBAVectorSIMD( _mm_cvtepi32_ps( qp1 ) );
+    RGBAVectorSIMD np2 = RGBAVectorSIMD( _mm_cvtepi32_ps( qp2 ) );
 
-		RGBAVectorSIMD d1 = np1 - p1;
-		RGBAVectorSIMD d2 = np2 - p2;
-		float dist = (d1 * d1) + (d2 * d2);
-		if(dist < minDist) {
-			minDist = dist;
-			bp1 = np1; bp2 = np2;
-			bestPBitCombo = i;
-		}
-	}
+    RGBAVectorSIMD d1 = np1 - p1;
+    RGBAVectorSIMD d2 = np2 - p2;
+    float dist = (d1 * d1) + (d2 * d2);
+    if(dist < minDist) {
+      minDist = dist;
+      bp1 = np1; bp2 = np2;
+      bestPBitCombo = i;
+    }
+  }
 
-	p1 = bp1;
-	p2 = bp2;
+  p1 = bp1;
+  p2 = bp2;
 }
 
 int BC7CompressionModeSIMD::GetSubsetForIndex(int idx, const int shapeIdx) const {
-	int subset = 0;
-	
-	const int nSubsets = GetNumberOfSubsets();
-	switch(nSubsets) {
-		case 2:
-		{
-			subset = !!((1 << idx) & kShapeMask2[shapeIdx]);
-		}
-		break;
+  int subset = 0;
+    
+  const int nSubsets = GetNumberOfSubsets();
+  switch(nSubsets) {
+    case 2:
+    {
+      subset = !!((1 << idx) & kShapeMask2[shapeIdx]);
+    }
+    break;
 
-		case 3:
-		{
-			if(1 << idx & kShapeMask3[shapeIdx][0])
-				subset = 1 + !!((1 << idx) & kShapeMask3[shapeIdx][1]);
-			else
-				subset = 0;
-		}
-		break;
+    case 3:
+    {
+      if(1 << idx & kShapeMask3[shapeIdx][0])
+        subset = 1 + !!((1 << idx) & kShapeMask3[shapeIdx][1]);
+      else
+        subset = 0;
+    }
+    break;
 
-		default:
-		break;
-	}
+    default:
+    break;
+  }
 
-	return subset;
+  return subset;
 }
 
 int BC7CompressionModeSIMD::GetAnchorIndexForSubset(int subset, const int shapeIdx) const {
-	
-	const int nSubsets = GetNumberOfSubsets();
-	int anchorIdx = 0;
+    
+  const int nSubsets = GetNumberOfSubsets();
+  int anchorIdx = 0;
 
-	switch(subset) {
-		case 1:
-		{
-			if(nSubsets == 2) {
-				anchorIdx = kAnchorIdx2[shapeIdx];
-			}
-			else {
-				anchorIdx = kAnchorIdx3[0][shapeIdx];
-			}
-		}
-		break;
+  switch(subset) {
+    case 1:
+    {
+      if(nSubsets == 2) {
+        anchorIdx = kAnchorIdx2[shapeIdx];
+      }
+      else {
+        anchorIdx = kAnchorIdx3[0][shapeIdx];
+      }
+    }
+    break;
 
-		case 2:
-		{
-			assert(nSubsets == 3);
-			anchorIdx = kAnchorIdx3[1][shapeIdx];
-		}
-		break;
+    case 2:
+    {
+      assert(nSubsets == 3);
+      anchorIdx = kAnchorIdx3[1][shapeIdx];
+    }
+    break;
 
-		default:
-		break;
-	}
+    default:
+    break;
+  }
 
-	return anchorIdx;
+  return anchorIdx;
 }
 
 double BC7CompressionModeSIMD::CompressSingleColor(const RGBAVectorSIMD &p, RGBAVectorSIMD &p1, RGBAVectorSIMD &p2, int &bestPbitCombo) const {
 
-	// Our pixel to compress...
-	const __m128i pixel = p.ToPixel(*((const __m128i *)kByteValMask));
+  // Our pixel to compress...
+  const __m128i pixel = p.ToPixel(*((const __m128i *)kByteValMask));
 
-	uint32 bestDist = 0xFF;
-	bestPbitCombo = -1;
+  uint32 bestDist = 0xFF;
+  bestPbitCombo = -1;
 
-	for(int pbi = 0; pbi < GetNumPbitCombos(); pbi++) {
+  for(int pbi = 0; pbi < GetNumPbitCombos(); pbi++) {
 
-		const int *pbitCombo = GetPBitCombo(pbi);
-		
-		uint32 dist = 0x0;
-		uint32 bestValI[kNumColorChannels] = { -1, -1, -1, -1 };
-		uint32 bestValJ[kNumColorChannels] = { -1, -1, -1, -1 };
+    const int *pbitCombo = GetPBitCombo(pbi);
+        
+    uint32 dist = 0x0;
+    uint32 bestValI[kNumColorChannels] = { -1, -1, -1, -1 };
+    uint32 bestValJ[kNumColorChannels] = { -1, -1, -1, -1 };
 
-		for(int ci = 0; ci < kNumColorChannels; ci++) {
+    for(int ci = 0; ci < kNumColorChannels; ci++) {
 
-			const uint8 val = ((uint8 *)(&pixel))[4*ci];
-			int nBits = 0;
-			switch(ci) {
-				case 0: nBits = GetRedChannelPrecision(); break;
-				case 1: nBits = GetGreenChannelPrecision(); break;
-				case 2: nBits = GetBlueChannelPrecision(); break;
-				case 3: nBits = GetAlphaChannelPrecision(); break;
-			}
+      const uint8 val = ((uint8 *)(&pixel))[4*ci];
+      int nBits = 0;
+      switch(ci) {
+        case 0: nBits = GetRedChannelPrecision(); break;
+        case 1: nBits = GetGreenChannelPrecision(); break;
+        case 2: nBits = GetBlueChannelPrecision(); break;
+        case 3: nBits = GetAlphaChannelPrecision(); break;
+      }
 
-			// If we don't handle this channel, then we don't need to
-			// worry about how well we interpolate.
-			if(nBits == 0) { bestValI[ci] = bestValJ[ci] = 0xFF; continue; }
+      // If we don't handle this channel, then we don't need to
+      // worry about how well we interpolate.
+      if(nBits == 0) { bestValI[ci] = bestValJ[ci] = 0xFF; continue; }
 
-			const int nPossVals = (1 << nBits);
-			int possValsH[256];
-			int possValsL[256];
+      const int nPossVals = (1 << nBits);
+      int possValsH[256];
+      int possValsL[256];
 
-			// Do we have a pbit?
-			const bool havepbit = GetPBitType() != ePBitType_None;
-			if(havepbit)
-				nBits++;
+      // Do we have a pbit?
+      const bool havepbit = GetPBitType() != ePBitType_None;
+      if(havepbit)
+        nBits++;
 
-			for(int i = 0; i < nPossVals; i++) {
+      for(int i = 0; i < nPossVals; i++) {
 
-				int vh = i, vl = i;
-				if(havepbit) {
-					vh <<= 1;
-					vl <<= 1;
+        int vh = i, vl = i;
+        if(havepbit) {
+          vh <<= 1;
+          vl <<= 1;
 
-					vh |= pbitCombo[1];
-					vl |= pbitCombo[0];
-				}
+          vh |= pbitCombo[1];
+          vl |= pbitCombo[0];
+        }
 
-				possValsH[i] = (vh << (8 - nBits));
-				possValsH[i] |= (possValsH[i] >> nBits);
+        possValsH[i] = (vh << (8 - nBits));
+        possValsH[i] |= (possValsH[i] >> nBits);
 
-				possValsL[i] = (vl << (8 - nBits));
-				possValsL[i] |= (possValsL[i] >> nBits);
-			}
+        possValsL[i] = (vl << (8 - nBits));
+        possValsL[i] |= (possValsL[i] >> nBits);
+      }
 
-			const uint32 interpVal0 = kBC7InterpolationValuesScalar[GetNumberOfBitsPerIndex() - 1][1][0];
-			const uint32 interpVal1 = kBC7InterpolationValuesScalar[GetNumberOfBitsPerIndex() - 1][1][1];
+      const uint32 interpVal0 = kBC7InterpolationValuesScalar[GetNumberOfBitsPerIndex() - 1][1][0];
+      const uint32 interpVal1 = kBC7InterpolationValuesScalar[GetNumberOfBitsPerIndex() - 1][1][1];
 
-			// Find the closest interpolated val that to the given val...
-			uint32 bestChannelDist = 0xFF;
-			for(int i = 0; bestChannelDist > 0 && i < nPossVals; i++)
-			for(int j = 0; bestChannelDist > 0 && j < nPossVals; j++) {
+      // Find the closest interpolated val that to the given val...
+      uint32 bestChannelDist = 0xFF;
+      for(int i = 0; bestChannelDist > 0 && i < nPossVals; i++)
+      for(int j = 0; bestChannelDist > 0 && j < nPossVals; j++) {
 
-				const uint32 v1 = possValsL[i];
-				const uint32 v2 = possValsH[j];
+        const uint32 v1 = possValsL[i];
+        const uint32 v2 = possValsH[j];
 
-				const uint32 combo = (interpVal0*v1 + (interpVal1 * v2) + 32) >> 6;
-				const uint32 err = (combo > val)? combo - val : val - combo;
+        const uint32 combo = (interpVal0*v1 + (interpVal1 * v2) + 32) >> 6;
+        const uint32 err = (combo > val)? combo - val : val - combo;
 
-				if(err < bestChannelDist) {
-					bestChannelDist = err;
-					bestValI[ci] = v1;
-					bestValJ[ci] = v2;
-				}
-			}
+        if(err < bestChannelDist) {
+          bestChannelDist = err;
+          bestValI[ci] = v1;
+          bestValJ[ci] = v2;
+        }
+      }
 
-			dist = max(bestChannelDist, dist);
-		}
+      dist = max(bestChannelDist, dist);
+    }
 
-		if(dist < bestDist) {
-			bestDist = dist;
-			bestPbitCombo = pbi;
+    if(dist < bestDist) {
+      bestDist = dist;
+      bestPbitCombo = pbi;
 
-			for(int ci = 0; ci < kNumColorChannels; ci++) {
-				p1.c[ci] = float(bestValI[ci]);
-				p2.c[ci] = float(bestValJ[ci]);
-			}
-		}
-	}
+      for(int ci = 0; ci < kNumColorChannels; ci++) {
+        p1.c[ci] = float(bestValI[ci]);
+        p2.c[ci] = float(bestValJ[ci]);
+      }
+    }
+  }
 
-	return bestDist;
+  return bestDist;
 }
 
 static const ALIGN_SSE uint32 kOneVec[4] = { 1, 1, 1, 1 };
@@ -433,26 +433,26 @@ static const ALIGN_SSE uint32 kOneVec[4] = { 1, 1, 1, 1 };
 // http://software.intel.com/en-us/articles/fast-random-number-generator-on-the-intel-pentiumr-4-processor/
 static uint32 g_seed = uint32(time(NULL));
 static inline uint32 fastrand() { 
-	g_seed = (214013 * g_seed + 2531011); 
-	return (g_seed>>16) & RAND_MAX; 
+  g_seed = (214013 * g_seed + 2531011); 
+  return (g_seed>>16) & RAND_MAX; 
 } 
 
-static __m128i cur_seed = _mm_set1_epi32( int(time(NULL)) ); 	 
+static __m128i cur_seed = _mm_set1_epi32( int(time(NULL)) );     
 static inline __m128i rand_dir()
 {
-	// static const __m128i mult = _mm_set_epi32( 214013, 17405, 214013, 69069 ); 
-	// static const __m128i gadd = _mm_set_epi32( 2531011, 10395331, 13737667, 1 ); 
-	static const ALIGN_SSE uint32 mult[4] = { 214013, 17405, 214013, 0 }; 
-	static const ALIGN_SSE uint32 gadd[4] = { 2531011, 10395331, 13737667, 0 }; 
-	static const ALIGN_SSE uint32 masklo[4] = { RAND_MAX, RAND_MAX, RAND_MAX, RAND_MAX };
-	
-	cur_seed = _mm_mullo_epi32( *((const __m128i *)mult), cur_seed );
-	cur_seed = _mm_add_epi32( *((const __m128i *)gadd), cur_seed );
+  // static const __m128i mult = _mm_set_epi32( 214013, 17405, 214013, 69069 ); 
+  // static const __m128i gadd = _mm_set_epi32( 2531011, 10395331, 13737667, 1 ); 
+  static const ALIGN_SSE uint32 mult[4] = { 214013, 17405, 214013, 0 }; 
+  static const ALIGN_SSE uint32 gadd[4] = { 2531011, 10395331, 13737667, 0 }; 
+  static const ALIGN_SSE uint32 masklo[4] = { RAND_MAX, RAND_MAX, RAND_MAX, RAND_MAX };
+    
+  cur_seed = _mm_mullo_epi32( *((const __m128i *)mult), cur_seed );
+  cur_seed = _mm_add_epi32( *((const __m128i *)gadd), cur_seed );
 
-	const __m128i resShift = _mm_srai_epi32( cur_seed, 16 );
-	const __m128i result = _mm_and_si128( resShift, *((const __m128i *)kOneVec) );
+  const __m128i resShift = _mm_srai_epi32( cur_seed, 16 );
+  const __m128i result = _mm_and_si128( resShift, *((const __m128i *)kOneVec) );
 
-	return result;
+  return result;
 } 
 
 // Fast generation of floats between 0 and 1. It generates a float
@@ -463,18 +463,18 @@ static inline __m128i rand_dir()
 
 #define COMPILE_ASSERT(x) extern int __compile_assert_[(int)(x)];
 COMPILE_ASSERT(RAND_MAX == 0x7FFF)
-	 
+     
 static inline float frand() { 
-	const uint16 r = fastrand();
-	
-	// RAND_MAX is 0x7FFF, which offers 15 bits
-	// of precision. Therefore, we move the bits
-	// into the top of the 23 bit mantissa, and 
-	// repeat the most significant bits of r in 
-	// the least significant of the mantissa
-	const uint32 m = (r << 8) | (r >> 7);
-	const uint32 flt = (127 << 23) | m;
-	return *(reinterpret_cast<const float *>(&flt)) - 1.0f;
+  const uint16 r = fastrand();
+    
+  // RAND_MAX is 0x7FFF, which offers 15 bits
+  // of precision. Therefore, we move the bits
+  // into the top of the 23 bit mantissa, and 
+  // repeat the most significant bits of r in 
+  // the least significant of the mantissa
+  const uint32 m = (r << 8) | (r >> 7);
+  const uint32 flt = (127 << 23) | m;
+  return *(reinterpret_cast<const float *>(&flt)) - 1.0f;
 }
 
 static const ALIGN_SSE uint32 kSevenVec[4] = { 7, 7, 7, 7 };
@@ -482,827 +482,824 @@ static const ALIGN_SSE uint32 kNegOneVec[4] = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF
 static const ALIGN_SSE uint32 kFloatSignBit[4] = { 0x40000000, 0x40000000, 0x40000000, 0x40000000 };
 
 static void ChangePointForDirWithoutPbitChange(RGBAVectorSIMD &v, const __m128 &stepVec) {
-	
-	const __m128i dirBool = rand_dir();
-	const __m128i cmp = _mm_cmpeq_epi32( dirBool, *((const __m128i *)kZeroVector) );
+    
+  const __m128i dirBool = rand_dir();
+  const __m128i cmp = _mm_cmpeq_epi32( dirBool, *((const __m128i *)kZeroVector) );
 
-	const __m128 negStepVec = _mm_sub_ps( _mm_castsi128_ps( *((const __m128i *)kZeroVector) ), stepVec );
-	const __m128 step = _mm_blendv_ps( negStepVec, stepVec, _mm_castsi128_ps( cmp ) );
-	v.vec = _mm_add_ps( v.vec, step );
+  const __m128 negStepVec = _mm_sub_ps( _mm_castsi128_ps( *((const __m128i *)kZeroVector) ), stepVec );
+  const __m128 step = _mm_blendv_ps( negStepVec, stepVec, _mm_castsi128_ps( cmp ) );
+  v.vec = _mm_add_ps( v.vec, step );
 }
 
 static void ChangePointForDirWithPbitChange(RGBAVectorSIMD &v, int oldPbit, const __m128 &stepVec) {
 
-	const __m128i pBitVec = _mm_set1_epi32( oldPbit );
-	const __m128i cmpPBit = _mm_cmpeq_epi32( pBitVec, *((const __m128i *)kZeroVector) );
-	const __m128i notCmpPBit = _mm_xor_si128( cmpPBit, *((const __m128i *)kNegOneVec) );
+  const __m128i pBitVec = _mm_set1_epi32( oldPbit );
+  const __m128i cmpPBit = _mm_cmpeq_epi32( pBitVec, *((const __m128i *)kZeroVector) );
+  const __m128i notCmpPBit = _mm_xor_si128( cmpPBit, *((const __m128i *)kNegOneVec) );
 
-	const __m128i dirBool = rand_dir();
-	const __m128i cmpDir = _mm_cmpeq_epi32( dirBool, *((const __m128i *)kOneVec) );
-	const __m128i notCmpDir = _mm_xor_si128( cmpDir, *((const __m128i *)kNegOneVec) );
-	
-	const __m128i shouldDec = _mm_and_si128( cmpDir, cmpPBit );
-	const __m128i shouldInc = _mm_and_si128( notCmpDir, notCmpPBit );
+  const __m128i dirBool = rand_dir();
+  const __m128i cmpDir = _mm_cmpeq_epi32( dirBool, *((const __m128i *)kOneVec) );
+  const __m128i notCmpDir = _mm_xor_si128( cmpDir, *((const __m128i *)kNegOneVec) );
+    
+  const __m128i shouldDec = _mm_and_si128( cmpDir, cmpPBit );
+  const __m128i shouldInc = _mm_and_si128( notCmpDir, notCmpPBit );
 
-	const __m128 decStep = _mm_blendv_ps( _mm_castsi128_ps( *((const __m128i *)kZeroVector) ), stepVec, _mm_castsi128_ps( shouldDec ) );
-	v.vec = _mm_sub_ps( v.vec, decStep );
+  const __m128 decStep = _mm_blendv_ps( _mm_castsi128_ps( *((const __m128i *)kZeroVector) ), stepVec, _mm_castsi128_ps( shouldDec ) );
+  v.vec = _mm_sub_ps( v.vec, decStep );
 
-	const __m128 incStep = _mm_blendv_ps( _mm_castsi128_ps( *((const __m128i *)kZeroVector) ), stepVec, _mm_castsi128_ps( shouldInc ) );
-	v.vec = _mm_add_ps( v.vec, incStep );
+  const __m128 incStep = _mm_blendv_ps( _mm_castsi128_ps( *((const __m128i *)kZeroVector) ), stepVec, _mm_castsi128_ps( shouldInc ) );
+  v.vec = _mm_add_ps( v.vec, incStep );
 }
 
 void BC7CompressionModeSIMD::PickBestNeighboringEndpoints(const RGBAClusterSIMD &cluster, const RGBAVectorSIMD &p1, const RGBAVectorSIMD &p2, const int curPbitCombo, RGBAVectorSIMD &np1, RGBAVectorSIMD &np2, int &nPbitCombo, const __m128 &stepVec) const {
 
-	np1 = p1;
-	np2 = p2;
+  np1 = p1;
+  np2 = p2;
 
-	// First, let's figure out the new pbit combo... if there's no pbit then we don't need
-	// to worry about it.
-	const EPBitType pBitType = GetPBitType();
-	if(pBitType != ePBitType_None) {
+  // First, let's figure out the new pbit combo... if there's no pbit then we don't need
+  // to worry about it.
+  const EPBitType pBitType = GetPBitType();
+  if(pBitType != ePBitType_None) {
 
-		// If there is a pbit, then we must change it, because those will provide the closest values
-		// to the current point.
-		if(pBitType == ePBitType_Shared)
-			nPbitCombo = (curPbitCombo + 1) % 2;
-		else {
-			// Not shared... p1 needs to change and p2 needs to change... which means that 
-			// combo 0 gets rotated to combo 3, combo 1 gets rotated to combo 2 and vice
-			// versa...
-			nPbitCombo = 3 - curPbitCombo;
-		}
+    // If there is a pbit, then we must change it, because those will provide the closest values
+    // to the current point.
+    if(pBitType == ePBitType_Shared)
+      nPbitCombo = (curPbitCombo + 1) % 2;
+    else {
+      // Not shared... p1 needs to change and p2 needs to change... which means that 
+      // combo 0 gets rotated to combo 3, combo 1 gets rotated to combo 2 and vice
+      // versa...
+      nPbitCombo = 3 - curPbitCombo;
+    }
 
-		assert(GetPBitCombo(curPbitCombo)[0] + GetPBitCombo(nPbitCombo)[0] == 1);
-		assert(GetPBitCombo(curPbitCombo)[1] + GetPBitCombo(nPbitCombo)[1] == 1);
+    assert(GetPBitCombo(curPbitCombo)[0] + GetPBitCombo(nPbitCombo)[0] == 1);
+    assert(GetPBitCombo(curPbitCombo)[1] + GetPBitCombo(nPbitCombo)[1] == 1);
 
-		const int *pBitCombo = GetPBitCombo(curPbitCombo);
-		ChangePointForDirWithPbitChange(np1, pBitCombo[0], stepVec);
-		ChangePointForDirWithPbitChange(np2, pBitCombo[1], stepVec);
-	}
-	else {
-		ChangePointForDirWithoutPbitChange(np1, stepVec);
-		ChangePointForDirWithoutPbitChange(np2, stepVec);
-	}
+    const int *pBitCombo = GetPBitCombo(curPbitCombo);
+    ChangePointForDirWithPbitChange(np1, pBitCombo[0], stepVec);
+    ChangePointForDirWithPbitChange(np2, pBitCombo[1], stepVec);
+  }
+  else {
+    ChangePointForDirWithoutPbitChange(np1, stepVec);
+    ChangePointForDirWithoutPbitChange(np2, stepVec);
+  }
 
-	ClampEndpoints(np1, np2);
+  ClampEndpoints(np1, np2);
 }
 
 bool BC7CompressionModeSIMD::AcceptNewEndpointError(float newError, float oldError, float temp) const {
 
-	const float p = exp((0.15f * (oldError - newError)) / temp);
-	// const double r = (double(rand()) / double(RAND_MAX));
-	const float r = frand();
+  const float p = exp((0.15f * (oldError - newError)) / temp);
+  // const double r = (double(rand()) / double(RAND_MAX));
+  const float r = frand();
 
-	return r < p;
+  return r < p;
 }
 
 double BC7CompressionModeSIMD::OptimizeEndpointsForCluster(const RGBAClusterSIMD &cluster, RGBAVectorSIMD &p1, RGBAVectorSIMD &p2, __m128i *bestIndices, int &bestPbitCombo) const {
-	
-	const int nBuckets = (1 << GetNumberOfBitsPerIndex());
-	const int nPbitCombos = GetNumPbitCombos();
-	__m128i qmask;
-	GetQuantizationMask(qmask);
+    
+  const int nBuckets = (1 << GetNumberOfBitsPerIndex());
+  const int nPbitCombos = GetNumPbitCombos();
+  __m128i qmask;
+  GetQuantizationMask(qmask);
 
-	// Here we use simulated annealing to traverse the space of clusters to find the best possible endpoints.
-	float curError = cluster.QuantizedError(p1, p2, nBuckets, qmask, GetPBitCombo(bestPbitCombo), bestIndices);
-	int curPbitCombo = bestPbitCombo;
-	float bestError = curError;
-	RGBAVectorSIMD bp1 = p1, bp2 = p2;
+  // Here we use simulated annealing to traverse the space of clusters to find the best possible endpoints.
+  float curError = cluster.QuantizedError(p1, p2, nBuckets, qmask, GetPBitCombo(bestPbitCombo), bestIndices);
+  int curPbitCombo = bestPbitCombo;
+  float bestError = curError;
+  RGBAVectorSIMD bp1 = p1, bp2 = p2;
 
-	assert(curError == cluster.QuantizedError(p1, p2, nBuckets, qmask, GetPBitCombo(bestPbitCombo)));
+  assert(curError == cluster.QuantizedError(p1, p2, nBuckets, qmask, GetPBitCombo(bestPbitCombo)));
 
-	__m128i precVec = _mm_setr_epi32( GetRedChannelPrecision(), GetGreenChannelPrecision(), GetBlueChannelPrecision(), GetAlphaChannelPrecision() );
-	const __m128i precMask = _mm_xor_si128( _mm_cmpeq_epi32( precVec, *((const __m128i *)kZeroVector) ), *((const __m128i *)kNegOneVec) );
-	precVec = _mm_sub_epi32( *((const __m128i *)kSevenVec), precVec );
-	precVec = _mm_slli_epi32( precVec, 23 );
-	precVec = _mm_or_si128( precVec, *((const __m128i *)kFloatSignBit) );
-	
-	//__m128 stepSzVec = _mm_set1_ps(1.0f);
-	//__m128 stepVec = _mm_mul_ps( stepSzVec, _mm_castsi128_ps( _mm_and_si128( precMask, precVec ) ) );
-	__m128 stepVec = _mm_castsi128_ps( _mm_and_si128( precMask, precVec ) );
+  __m128i precVec = _mm_setr_epi32( GetRedChannelPrecision(), GetGreenChannelPrecision(), GetBlueChannelPrecision(), GetAlphaChannelPrecision() );
+  const __m128i precMask = _mm_xor_si128( _mm_cmpeq_epi32( precVec, *((const __m128i *)kZeroVector) ), *((const __m128i *)kNegOneVec) );
+  precVec = _mm_sub_epi32( *((const __m128i *)kSevenVec), precVec );
+  precVec = _mm_slli_epi32( precVec, 23 );
+  precVec = _mm_or_si128( precVec, *((const __m128i *)kFloatSignBit) );
+    
+  //__m128 stepSzVec = _mm_set1_ps(1.0f);
+  //__m128 stepVec = _mm_mul_ps( stepSzVec, _mm_castsi128_ps( _mm_and_si128( precMask, precVec ) ) );
+  __m128 stepVec = _mm_castsi128_ps( _mm_and_si128( precMask, precVec ) );
 
-	const int maxEnergy = MaxAnnealingIterations;
-	for(int energy = 0; bestError > 0 && energy < maxEnergy; energy++) {
+  const int maxEnergy = MaxAnnealingIterations;
+  for(int energy = 0; bestError > 0 && energy < maxEnergy; energy++) {
 
-		float temp = float(energy) / float(maxEnergy-1);
+    float temp = float(energy) / float(maxEnergy-1);
 
-		__m128i indices[kMaxNumDataPoints/4];
-		RGBAVectorSIMD np1, np2;
-		int nPbitCombo;
+    __m128i indices[kMaxNumDataPoints/4];
+    RGBAVectorSIMD np1, np2;
+    int nPbitCombo;
 
-		PickBestNeighboringEndpoints(cluster, p1, p2, curPbitCombo, np1, np2, nPbitCombo, stepVec);
+    PickBestNeighboringEndpoints(cluster, p1, p2, curPbitCombo, np1, np2, nPbitCombo, stepVec);
 
-		float error = cluster.QuantizedError(np1, np2, nBuckets, qmask, GetPBitCombo(nPbitCombo), indices);
-		if(AcceptNewEndpointError(error, curError, temp)) {
-			curError = error;
-			p1 = np1;
-			p2 = np2;
-			curPbitCombo = nPbitCombo;
-		}
+    float error = cluster.QuantizedError(np1, np2, nBuckets, qmask, GetPBitCombo(nPbitCombo), indices);
+    if(AcceptNewEndpointError(error, curError, temp)) {
+      curError = error;
+      p1 = np1;
+      p2 = np2;
+      curPbitCombo = nPbitCombo;
+    }
 
-		if(error < bestError) {
-			memcpy(bestIndices, indices, sizeof(indices));
-			bp1 = np1;
-			bp2 = np2;
-			bestPbitCombo = nPbitCombo;
-			bestError = error;
+    if(error < bestError) {
+      memcpy(bestIndices, indices, sizeof(indices));
+      bp1 = np1;
+      bp2 = np2;
+      bestPbitCombo = nPbitCombo;
+      bestError = error;
 
-			// Restart...
-			energy = 0;
-		}
-	}
+      // Restart...
+      energy = 0;
+    }
+  }
 
-	p1 = bp1;
-	p2 = bp2;
+  p1 = bp1;
+  p2 = bp2;
 
-	return bestError;
+  return bestError;
 }
 
 double BC7CompressionModeSIMD::CompressCluster(const RGBAClusterSIMD &cluster, RGBAVectorSIMD &p1, RGBAVectorSIMD &p2, __m128i *bestIndices, int &bestPbitCombo) const {
-		
-	// If all the points are the same in the cluster, then we need to figure out what the best
-	// approximation to this point is....
-	if(cluster.AllSamePoint()) {
-		const RGBAVectorSIMD &p = cluster.GetPoint(0);
-		double bestErr = CompressSingleColor(p, p1, p2, bestPbitCombo);
+        
+  // If all the points are the same in the cluster, then we need to figure out what the best
+  // approximation to this point is....
+  if(cluster.AllSamePoint()) {
+    const RGBAVectorSIMD &p = cluster.GetPoint(0);
+    double bestErr = CompressSingleColor(p, p1, p2, bestPbitCombo);
 
-		// We're assuming all indices will be index 1...
-		for(int i = 0; i < 4; i++) {
-			bestIndices[i] = _mm_set1_epi32(1);
-		}
-		
-		return bestErr;
-	}
-	
-	const int nBuckets = (1 << GetNumberOfBitsPerIndex());
-	const int nPbitCombos = GetNumPbitCombos();
+    // We're assuming all indices will be index 1...
+    for(int i = 0; i < 4; i++) {
+      bestIndices[i] = _mm_set1_epi32(1);
+    }
+        
+    return bestErr;
+  }
+    
+  const int nBuckets = (1 << GetNumberOfBitsPerIndex());
+  const int nPbitCombos = GetNumPbitCombos();
 
-	RGBAVectorSIMD avg = cluster.GetTotal() / float(cluster.GetNumPoints());
-	RGBADirSIMD axis;
-	::GetPrincipalAxis(cluster, axis);
+  RGBAVectorSIMD avg = cluster.GetTotal() / float(cluster.GetNumPoints());
+  RGBADirSIMD axis;
+  ::GetPrincipalAxis(cluster, axis);
 
-	float mindp = FLT_MAX, maxdp = -FLT_MAX;
-	for(int i = 0 ; i < cluster.GetNumPoints(); i++) {
-		float dp = (cluster.GetPoint(i) - avg) * axis;
-		if(dp < mindp) mindp = dp;
-		if(dp > maxdp) maxdp = dp;
-	}
+  float mindp = FLT_MAX, maxdp = -FLT_MAX;
+  for(int i = 0 ; i < cluster.GetNumPoints(); i++) {
+    float dp = (cluster.GetPoint(i) - avg) * axis;
+    if(dp < mindp) mindp = dp;
+    if(dp > maxdp) maxdp = dp;
+  }
 
-	RGBAVectorSIMD pts[1 << 4]; // At most 4 bits per index.
-	float numPts[1<<4];
-	assert(nBuckets <= 1 << 4);
-	
-	p1 = avg + mindp * axis;
-	p2 = avg + maxdp * axis;
+  RGBAVectorSIMD pts[1 << 4]; // At most 4 bits per index.
+  float numPts[1<<4];
+  assert(nBuckets <= 1 << 4);
+    
+  p1 = avg + mindp * axis;
+  p2 = avg + maxdp * axis;
 
-	ClampEndpoints(p1, p2);
+  ClampEndpoints(p1, p2);
 
-	for(int i = 0; i < nBuckets; i++) {
-		float s = (float(i) / float(nBuckets - 1));
-		pts[i] = (1.0f - s) * p1 + s * p2;
-	}
+  for(int i = 0; i < nBuckets; i++) {
+    float s = (float(i) / float(nBuckets - 1));
+    pts[i] = (1.0f - s) * p1 + s * p2;
+  }
 
-	assert(pts[0] == p1);
-	assert(pts[nBuckets - 1] == p2);
+  assert(pts[0] == p1);
+  assert(pts[nBuckets - 1] == p2);
 
-	// Do k-means clustering...
-	int bucketIdx[kMaxNumDataPoints];
+  // Do k-means clustering...
+  int bucketIdx[kMaxNumDataPoints];
 
-	bool fixed = false;
-	while(!fixed) {
-		
-		RGBAVectorSIMD newPts[1 << 4];
+  bool fixed = false;
+  while(!fixed) {
+        
+    RGBAVectorSIMD newPts[1 << 4];
 
-		// Assign each of the existing points to one of the buckets...
-		for(int i = 0; i < cluster.GetNumPoints(); i++) {
+    // Assign each of the existing points to one of the buckets...
+    for(int i = 0; i < cluster.GetNumPoints(); i++) {
 
-			int minBucket = -1;
-			float minDist = FLT_MAX;
-			for(int j = 0; j < nBuckets; j++) {
-				RGBAVectorSIMD v = cluster.GetPoint(i) - pts[j];
-				float distSq = v * v;
-				if(distSq < minDist)
-				{
-					minDist = distSq;
-					minBucket = j;
-				}
-			}
+      int minBucket = -1;
+      float minDist = FLT_MAX;
+      for(int j = 0; j < nBuckets; j++) {
+        RGBAVectorSIMD v = cluster.GetPoint(i) - pts[j];
+        float distSq = v * v;
+        if(distSq < minDist)
+          {
+            minDist = distSq;
+            minBucket = j;
+          }
+      }
 
-			assert(minBucket >= 0);
-			bucketIdx[i] = minBucket;
-		}
+      assert(minBucket >= 0);
+      bucketIdx[i] = minBucket;
+    }
 
-		// Calculate new buckets based on centroids of clusters...
-		for(int i = 0; i < nBuckets; i++) {
-			
-			numPts[i] = 0.0f;
-			newPts[i] = RGBAVectorSIMD(0.0f);
-			for(int j = 0; j < cluster.GetNumPoints(); j++) {
-				if(bucketIdx[j] == i) {
-					numPts[i] += 1.0f;
-					newPts[i] += cluster.GetPoint(j);
-				}
-			}
+    // Calculate new buckets based on centroids of clusters...
+    for(int i = 0; i < nBuckets; i++) {
+            
+      numPts[i] = 0.0f;
+      newPts[i] = RGBAVectorSIMD(0.0f);
+      for(int j = 0; j < cluster.GetNumPoints(); j++) {
+        if(bucketIdx[j] == i) {
+          numPts[i] += 1.0f;
+          newPts[i] += cluster.GetPoint(j);
+        }
+      }
 
-			// If there are no points in this cluster, then it should
-			// remain the same as last time and avoid a divide by zero.
-			if(0.0f != numPts[i])
-				newPts[i] /= numPts[i];
-		}
+      // If there are no points in this cluster, then it should
+      // remain the same as last time and avoid a divide by zero.
+      if(0.0f != numPts[i])
+        newPts[i] /= numPts[i];
+    }
 
-		// If we haven't changed, then we're done.
-		fixed = true;
-		for(int i = 0; i < nBuckets; i++) {
-			if(pts[i] != newPts[i])
-				fixed = false;
-		}
+    // If we haven't changed, then we're done.
+    fixed = true;
+    for(int i = 0; i < nBuckets; i++) {
+      if(pts[i] != newPts[i])
+        fixed = false;
+    }
 
-		// Assign the new points to be the old points.
-		for(int i = 0; i < nBuckets; i++) {
-			pts[i] = newPts[i];
-		}
-	}
+    // Assign the new points to be the old points.
+    for(int i = 0; i < nBuckets; i++) {
+      pts[i] = newPts[i];
+    }
+  }
 
-	// If there's only one bucket filled, then just compress for that single color...
-	int numBucketsFilled = 0, lastFilledBucket = -1;
-	for(int i = 0; i < nBuckets; i++) {
-		if(numPts[i] > 0.0f) {
-			numBucketsFilled++;
-			lastFilledBucket = i;
-		}
-	}
+  // If there's only one bucket filled, then just compress for that single color...
+  int numBucketsFilled = 0, lastFilledBucket = -1;
+  for(int i = 0; i < nBuckets; i++) {
+    if(numPts[i] > 0.0f) {
+      numBucketsFilled++;
+      lastFilledBucket = i;
+    }
+  }
 
-	assert(numBucketsFilled > 0);
-	if(1 == numBucketsFilled) {
-		const RGBAVectorSIMD &p = pts[lastFilledBucket];
-		double bestErr = CompressSingleColor(p, p1, p2, bestPbitCombo);
+  assert(numBucketsFilled > 0);
+  if(1 == numBucketsFilled) {
+    const RGBAVectorSIMD &p = pts[lastFilledBucket];
+    double bestErr = CompressSingleColor(p, p1, p2, bestPbitCombo);
 
-		// We're assuming all indices will be index 1...
-		for(int i = 0; i < 4; i++) {
-			bestIndices[i] = _mm_set1_epi32(1);
-		}
-		  
-		return bestErr;
-	}
+    // We're assuming all indices will be index 1...
+    for(int i = 0; i < 4; i++) {
+      bestIndices[i] = _mm_set1_epi32(1);
+    }
+          
+    return bestErr;
+  }
 
-	// Now that we know the index of each pixel, we can assign the endpoints based on a least squares fit
-	// of the clusters. For more information, take a look at this article by NVidia:
-	// http://developer.download.nvidia.com/compute/cuda/1.1-Beta/x86_website/projects/dxtc/doc/cuda_dxtc.pdf
-	float asq = 0.0, bsq = 0.0, ab = 0.0;
-	RGBAVectorSIMD ax(0.0f), bx(0.0f);
-	for(int i = 0; i < nBuckets; i++) {
-		float a = float(nBuckets - 1 - i) / float(nBuckets - 1);
-		float b = float(i) / float(nBuckets - 1);
+  // Now that we know the index of each pixel, we can assign the endpoints based on a least squares fit
+  // of the clusters. For more information, take a look at this article by NVidia:
+  // http://developer.download.nvidia.com/compute/cuda/1.1-Beta/x86_website/projects/dxtc/doc/cuda_dxtc.pdf
+  float asq = 0.0, bsq = 0.0, ab = 0.0;
+  RGBAVectorSIMD ax(0.0f), bx(0.0f);
+  for(int i = 0; i < nBuckets; i++) {
+    float a = float(nBuckets - 1 - i) / float(nBuckets - 1);
+    float b = float(i) / float(nBuckets - 1);
 
-		float n = numPts[i];
-		RGBAVectorSIMD x = pts[i];
+    float n = numPts[i];
+    RGBAVectorSIMD x = pts[i];
 
-		asq += n * a * a;
-		bsq += n * b * b;
-		ab += n * a * b;
+    asq += n * a * a;
+    bsq += n * b * b;
+    ab += n * a * b;
 
-		ax += x * a * n;
-		bx += x * b * n;
-	}
+    ax += x * a * n;
+    bx += x * b * n;
+  }
 
-	float f = 1.0f / (asq * bsq - ab * ab);
-	p1 = f * (ax * bsq - bx * ab);
-	p2 = f * (bx * asq - ax * ab);
+  float f = 1.0f / (asq * bsq - ab * ab);
+  p1 = f * (ax * bsq - bx * ab);
+  p2 = f * (bx * asq - ax * ab);
 
-	ClampEndpointsToGrid(p1, p2, bestPbitCombo);
-
-	#ifdef _DEBUG
-		int pBitCombo = bestPbitCombo;
-		RGBAVectorSIMD tp1 = p1, tp2 = p2;
-		ClampEndpointsToGrid(tp1, tp2, pBitCombo);
-
-		assert(p1 == tp1);
-		assert(p2 == tp2);
-		assert(pBitCombo == bestPbitCombo);
-	#endif
-
-	assert(bestPbitCombo >= 0);
-
-	return OptimizeEndpointsForCluster(cluster, p1, p2, bestIndices, bestPbitCombo);
-}
-
-double BC7CompressionModeSIMD::Compress(BitStream &stream, const int shapeIdx, const RGBAClusterSIMD *clusters) const {	
-
-	const int kModeNumber = GetModeNumber();
-	const int nPartitionBits = GetNumberOfPartitionBits();
-	const int nSubsets = GetNumberOfSubsets();
-
-	// Mode #
-	stream.WriteBits(1 << kModeNumber, kModeNumber + 1);
-
-	// Partition #
-	assert((((1 << nPartitionBits) - 1) & shapeIdx) == shapeIdx);
-	stream.WriteBits(shapeIdx, nPartitionBits);
-		
-	RGBAVectorSIMD p1[kMaxNumSubsets], p2[kMaxNumSubsets];
-	int bestIndices[kMaxNumSubsets][kMaxNumDataPoints] = {
-		{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
-		{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
-		{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }
-	};
-	int bestPbitCombo[kMaxNumSubsets] = { -1, -1, -1 };
-
-	double totalErr = 0.0;
-	for(int cidx = 0; cidx < nSubsets; cidx++) {
-		ALIGN_SSE int indices[kMaxNumDataPoints];
-
-		// Compress this cluster
-		totalErr += CompressCluster(clusters[cidx], p1[cidx], p2[cidx], (__m128i *)indices, bestPbitCombo[cidx]);
-
-		// !SPEED! We can precompute the subsets for each index based on the shape. This
-		// isn't the bottleneck for the compressor, but it could prove to be a little 
-		// faster...
-
-		// Map the indices to their proper position.
-		int idx = 0;
-		for(int i = 0; i < 16; i++) {
-			int subs = GetSubsetForIndex(i, shapeIdx);
-			if(subs == cidx) {
-				bestIndices[cidx][i] = indices[idx++];
-			}
-		}
-	}
+  ClampEndpointsToGrid(p1, p2, bestPbitCombo);
 
 #ifdef _DEBUG
-	for(int i = 0; i < kMaxNumDataPoints; i++) {
+  int pBitCombo = bestPbitCombo;
+  RGBAVectorSIMD tp1 = p1, tp2 = p2;
+  ClampEndpointsToGrid(tp1, tp2, pBitCombo);
 
-		int nSet = 0;
-		for(int j = 0; j < nSubsets; j++) {
-			if(bestIndices[j][i] >= 0)
-				nSet++;
-		}
-
-		assert(nSet == 1);
-	}
+  assert(p1 == tp1);
+  assert(p2 == tp2);
+  assert(pBitCombo == bestPbitCombo);
 #endif
 
-	// Get the quantization mask
-	__m128i qmask;
-	GetQuantizationMask(qmask);
+  assert(bestPbitCombo >= 0);
 
-	//Quantize the points...
-	__m128i pixel1[kMaxNumSubsets], pixel2[kMaxNumSubsets];
-	for(int i = 0; i < nSubsets; i++) {
-		switch(GetPBitType()) {
-			default:
-			case ePBitType_None: 
-				pixel1[i] = p1[i].ToPixel(qmask); 
-				pixel2[i] = p2[i].ToPixel(qmask); 
-			break;
+  return OptimizeEndpointsForCluster(cluster, p1, p2, bestIndices, bestPbitCombo);
+}
 
-			case ePBitType_Shared: 
-			case ePBitType_NotShared: 
-				pixel1[i] = p1[i].ToPixel(qmask, GetPBitCombo(bestPbitCombo[i])[0]); 
-				pixel2[i] = p2[i].ToPixel(qmask, GetPBitCombo(bestPbitCombo[i])[1]); 
-			break;
-		}
-	}
+double BC7CompressionModeSIMD::Compress(BitStream &stream, const int shapeIdx, const RGBAClusterSIMD *clusters) const { 
 
-	// If the anchor index does not have 0 in the leading bit, then 
-	// we need to swap EVERYTHING.
-	for(int sidx = 0; sidx < nSubsets; sidx++) {
+  const int kModeNumber = GetModeNumber();
+  const int nPartitionBits = GetNumberOfPartitionBits();
+  const int nSubsets = GetNumberOfSubsets();
 
-		int anchorIdx = GetAnchorIndexForSubset(sidx, shapeIdx);
-		assert(bestIndices[sidx][anchorIdx] != -1);
+  // Mode #
+  stream.WriteBits(1 << kModeNumber, kModeNumber + 1);
 
-		int nIndexBits = GetNumberOfBitsPerIndex();
-		if(bestIndices[sidx][anchorIdx] >> (nIndexBits - 1)) {
-			__m128i t = pixel1[sidx]; pixel1[sidx] = pixel2[sidx]; pixel2[sidx] = t;
+  // Partition #
+  assert((((1 << nPartitionBits) - 1) & shapeIdx) == shapeIdx);
+  stream.WriteBits(shapeIdx, nPartitionBits);
+        
+  RGBAVectorSIMD p1[kMaxNumSubsets], p2[kMaxNumSubsets];
+  int bestIndices[kMaxNumSubsets][kMaxNumDataPoints] = {
+    { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+    { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 },
+    { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }
+  };
+  int bestPbitCombo[kMaxNumSubsets] = { -1, -1, -1 };
 
-			int nIndexVals = 1 << nIndexBits;
-			for(int i = 0; i < 16; i++) {
-				bestIndices[sidx][i] = (nIndexVals - 1) - bestIndices[sidx][i];
-			}
-		}
+  double totalErr = 0.0;
+  for(int cidx = 0; cidx < nSubsets; cidx++) {
+    ALIGN_SSE int indices[kMaxNumDataPoints];
 
-		assert(!(bestIndices[sidx][anchorIdx] >> (nIndexBits - 1)));
-	}
+    // Compress this cluster
+    totalErr += CompressCluster(clusters[cidx], p1[cidx], p2[cidx], (__m128i *)indices, bestPbitCombo[cidx]);
 
-	// Get the quantized values...
-	uint8 r1[kMaxNumSubsets], g1[kMaxNumSubsets], b1[kMaxNumSubsets], a1[kMaxNumSubsets];
-	uint8 r2[kMaxNumSubsets], g2[kMaxNumSubsets], b2[kMaxNumSubsets], a2[kMaxNumSubsets];
-	for(int i = 0; i < nSubsets; i++) {
-		r1[i] = ((uint8 *)(&(pixel1[i])))[0];
-		r2[i] = ((uint8 *)(&(pixel2[i])))[0];
+    // !SPEED! We can precompute the subsets for each index based on the shape. This
+    // isn't the bottleneck for the compressor, but it could prove to be a little 
+    // faster...
 
-		g1[i] = ((uint8 *)(&(pixel1[i])))[4];
-		g2[i] = ((uint8 *)(&(pixel2[i])))[4];
+    // Map the indices to their proper position.
+    int idx = 0;
+    for(int i = 0; i < 16; i++) {
+      int subs = GetSubsetForIndex(i, shapeIdx);
+      if(subs == cidx) {
+        bestIndices[cidx][i] = indices[idx++];
+      }
+    }
+  }
 
-		b1[i] = ((uint8 *)(&(pixel1[i])))[8];
-		b2[i] = ((uint8 *)(&(pixel2[i])))[8];
+#ifdef _DEBUG
+  for(int i = 0; i < kMaxNumDataPoints; i++) {
 
-		a1[i] = ((uint8 *)(&(pixel1[i])))[12];
-		a2[i] = ((uint8 *)(&(pixel2[i])))[12];
-	}
+    int nSet = 0;
+    for(int j = 0; j < nSubsets; j++) {
+      if(bestIndices[j][i] >= 0)
+        nSet++;
+    }
 
-	// Write them out...
-	const int nRedBits = GetRedChannelPrecision();
-	for(int i = 0; i < nSubsets; i++) {
-		stream.WriteBits(r1[i] >> (8 - nRedBits), nRedBits);
-		stream.WriteBits(r2[i] >> (8 - nRedBits), nRedBits);
-	}
+    assert(nSet == 1);
+  }
+#endif
 
-	const int nGreenBits = GetGreenChannelPrecision();
-	for(int i = 0; i < nSubsets; i++) {
-		stream.WriteBits(g1[i] >> (8 - nGreenBits), nGreenBits);
-		stream.WriteBits(g2[i] >> (8 - nGreenBits), nGreenBits);
-	}
+  // Get the quantization mask
+  __m128i qmask;
+  GetQuantizationMask(qmask);
 
-	const int nBlueBits = GetBlueChannelPrecision();
-	for(int i = 0; i < nSubsets; i++) {
-		stream.WriteBits(b1[i] >> (8 - nBlueBits), nBlueBits);
-		stream.WriteBits(b2[i] >> (8 - nBlueBits), nBlueBits);
-	}
+  //Quantize the points...
+  __m128i pixel1[kMaxNumSubsets], pixel2[kMaxNumSubsets];
+  for(int i = 0; i < nSubsets; i++) {
+    switch(GetPBitType()) {
+    default:
+    case ePBitType_None: 
+      pixel1[i] = p1[i].ToPixel(qmask); 
+      pixel2[i] = p2[i].ToPixel(qmask); 
+      break;
 
-	const int nAlphaBits = GetAlphaChannelPrecision();
-	for(int i = 0; i < nSubsets; i++) {
-		stream.WriteBits(a1[i] >> (8 - nAlphaBits), nAlphaBits);
-		stream.WriteBits(a2[i] >> (8 - nAlphaBits), nAlphaBits);
-	}
+    case ePBitType_Shared: 
+    case ePBitType_NotShared: 
+      pixel1[i] = p1[i].ToPixel(qmask, GetPBitCombo(bestPbitCombo[i])[0]); 
+      pixel2[i] = p2[i].ToPixel(qmask, GetPBitCombo(bestPbitCombo[i])[1]); 
+      break;
+    }
+  }
 
-	// Write out the best pbits..
-	if(GetPBitType() != ePBitType_None) {
-		for(int s = 0; s < nSubsets; s++) {
-			const int *pbits = GetPBitCombo(bestPbitCombo[s]);
-			stream.WriteBits(pbits[0], 1);
-			if(GetPBitType() != ePBitType_Shared)
-				stream.WriteBits(pbits[1], 1);
-		}
-	}
+  // If the anchor index does not have 0 in the leading bit, then 
+  // we need to swap EVERYTHING.
+  for(int sidx = 0; sidx < nSubsets; sidx++) {
 
-	for(int i = 0; i < 16; i++) {
-		const int subs = GetSubsetForIndex(i, shapeIdx);
-		const int idx = bestIndices[subs][i];
-		const int anchorIdx = GetAnchorIndexForSubset(subs, shapeIdx);
-		const int nBitsForIdx = GetNumberOfBitsPerIndex();
-		assert(idx >= 0 && idx < (1 << nBitsForIdx));
-		assert(i != anchorIdx || !(idx >> (nBitsForIdx - 1)) || !"Leading bit of anchor index is not zero!");
-		stream.WriteBits(idx, (i == anchorIdx)? nBitsForIdx - 1 : nBitsForIdx);
-	}
+    int anchorIdx = GetAnchorIndexForSubset(sidx, shapeIdx);
+    assert(bestIndices[sidx][anchorIdx] != -1);
 
-	assert(stream.GetBitsWritten() == 128);
-	return totalErr;
+    int nIndexBits = GetNumberOfBitsPerIndex();
+    if(bestIndices[sidx][anchorIdx] >> (nIndexBits - 1)) {
+      __m128i t = pixel1[sidx]; pixel1[sidx] = pixel2[sidx]; pixel2[sidx] = t;
+
+      int nIndexVals = 1 << nIndexBits;
+      for(int i = 0; i < 16; i++) {
+        bestIndices[sidx][i] = (nIndexVals - 1) - bestIndices[sidx][i];
+      }
+    }
+
+    assert(!(bestIndices[sidx][anchorIdx] >> (nIndexBits - 1)));
+  }
+
+  // Get the quantized values...
+  uint8 r1[kMaxNumSubsets], g1[kMaxNumSubsets], b1[kMaxNumSubsets], a1[kMaxNumSubsets];
+  uint8 r2[kMaxNumSubsets], g2[kMaxNumSubsets], b2[kMaxNumSubsets], a2[kMaxNumSubsets];
+  for(int i = 0; i < nSubsets; i++) {
+    r1[i] = ((uint8 *)(&(pixel1[i])))[0];
+    r2[i] = ((uint8 *)(&(pixel2[i])))[0];
+
+    g1[i] = ((uint8 *)(&(pixel1[i])))[4];
+    g2[i] = ((uint8 *)(&(pixel2[i])))[4];
+
+    b1[i] = ((uint8 *)(&(pixel1[i])))[8];
+    b2[i] = ((uint8 *)(&(pixel2[i])))[8];
+
+    a1[i] = ((uint8 *)(&(pixel1[i])))[12];
+    a2[i] = ((uint8 *)(&(pixel2[i])))[12];
+  }
+
+  // Write them out...
+  const int nRedBits = GetRedChannelPrecision();
+  for(int i = 0; i < nSubsets; i++) {
+    stream.WriteBits(r1[i] >> (8 - nRedBits), nRedBits);
+    stream.WriteBits(r2[i] >> (8 - nRedBits), nRedBits);
+  }
+
+  const int nGreenBits = GetGreenChannelPrecision();
+  for(int i = 0; i < nSubsets; i++) {
+    stream.WriteBits(g1[i] >> (8 - nGreenBits), nGreenBits);
+    stream.WriteBits(g2[i] >> (8 - nGreenBits), nGreenBits);
+  }
+
+  const int nBlueBits = GetBlueChannelPrecision();
+  for(int i = 0; i < nSubsets; i++) {
+    stream.WriteBits(b1[i] >> (8 - nBlueBits), nBlueBits);
+    stream.WriteBits(b2[i] >> (8 - nBlueBits), nBlueBits);
+  }
+
+  const int nAlphaBits = GetAlphaChannelPrecision();
+  for(int i = 0; i < nSubsets; i++) {
+    stream.WriteBits(a1[i] >> (8 - nAlphaBits), nAlphaBits);
+    stream.WriteBits(a2[i] >> (8 - nAlphaBits), nAlphaBits);
+  }
+
+  // Write out the best pbits..
+  if(GetPBitType() != ePBitType_None) {
+    for(int s = 0; s < nSubsets; s++) {
+      const int *pbits = GetPBitCombo(bestPbitCombo[s]);
+      stream.WriteBits(pbits[0], 1);
+      if(GetPBitType() != ePBitType_Shared)
+        stream.WriteBits(pbits[1], 1);
+    }
+  }
+
+  for(int i = 0; i < 16; i++) {
+    const int subs = GetSubsetForIndex(i, shapeIdx);
+    const int idx = bestIndices[subs][i];
+    const int anchorIdx = GetAnchorIndexForSubset(subs, shapeIdx);
+    const int nBitsForIdx = GetNumberOfBitsPerIndex();
+    assert(idx >= 0 && idx < (1 << nBitsForIdx));
+    assert(i != anchorIdx || !(idx >> (nBitsForIdx - 1)) || !"Leading bit of anchor index is not zero!");
+    stream.WriteBits(idx, (i == anchorIdx)? nBitsForIdx - 1 : nBitsForIdx);
+  }
+
+  assert(stream.GetBitsWritten() == 128);
+  return totalErr;
 }
 
 namespace BC7C
 {
-	// Function prototypes
-	static void ExtractBlock(const uint8* inPtr, int width, uint32* colorBlock);
-	static void CompressBC7Block(const uint32 *block, uint8 *outBuf);
-
-	// Returns true if the entire block is a single color.
-	static bool AllOneColor(const uint32 block[16]) {
-		const uint32 pixel = block[0];
-		for(int i = 1; i < 16; i++) {
-			if( block[i] != pixel )
-				return false;
-		}
-
-		return true;
-	}
-
-	// Write out a transparent block.
-	static void WriteTransparentBlock(BitStream &stream) {
-		// Use mode 6
-		stream.WriteBits(1 << 6, 7);
-		stream.WriteBits(0, 128-7);
-		assert(stream.GetBitsWritten() == 128);
-	}
-
-	// Compresses a single color optimally and outputs the result.
-	static void CompressOptimalColorBC7(uint32 pixel, BitStream &stream) {
-
-		stream.WriteBits(1 << 5, 6); // Mode 5
-		stream.WriteBits(0, 2); // No rotation bits.
-
-		uint8 r = pixel & 0xFF;
-		uint8 g = (pixel >> 8) & 0xFF;
-		uint8 b = (pixel >> 16) & 0xFF;
-		uint8 a = (pixel >> 24) & 0xFF;
-
-		// Red endpoints
-		stream.WriteBits(Optimal7CompressBC7Mode5[r][0], 7);
-		stream.WriteBits(Optimal7CompressBC7Mode5[r][1], 7);
-
-		// Green endpoints
-		stream.WriteBits(Optimal7CompressBC7Mode5[g][0], 7);
-		stream.WriteBits(Optimal7CompressBC7Mode5[g][1], 7);
-
-		// Blue endpoints
-		stream.WriteBits(Optimal7CompressBC7Mode5[b][0], 7);
-		stream.WriteBits(Optimal7CompressBC7Mode5[b][1], 7);
-
-		// Alpha endpoints... are just the same.
-		stream.WriteBits(a, 8);
-		stream.WriteBits(a, 8);
-		
-		// Color indices are 1 for each pixel...
-		// Anchor index is 0, so 1 bit for the first pixel, then
-		// 01 for each following pixel giving the sequence of 31 bits:
-		// ...010101011
-		stream.WriteBits(0xaaaaaaab, 31);
-
-		// Alpha indices...
-		stream.WriteBits(kWMValues[gWMVal = (gWMVal+1) % kNumWMVals], 31); 
-	}
-
-	// Compress an image using BC7 compression. Use the inBuf parameter to point to an image in
-	// 4-byte RGBA format. The width and height parameters specify the size of the image in pixels.
-	// The buffer pointed to by outBuf should be large enough to store the compressed image. This
-	// implementation has an 4:1 compression ratio.
-	void CompressImageBC7SIMD(const unsigned char *inBuf, unsigned char *outBuf, unsigned int width, unsigned int height)
-	{
-		ALIGN_SSE uint32 block[16];
-
-		_MM_SET_ROUNDING_MODE( _MM_ROUND_TOWARD_ZERO );
-		BC7CompressionModeSIMD::ResetNumUses();
-
-		BC7CompressionModeSIMD::MaxAnnealingIterations = GetQualityLevel();
-
-		for(int j = 0; j < height; j += 4)
-		{
-			for(int i = 0; i < width; i += 4)
-			{
-			  CompressBC7Block((const uint32 *)inBuf, outBuf);
-
-			  outBuf += 16;
-			  inBuf += 64;
-			}
-		}
-	}
-
-	// Extract a 4 by 4 block of pixels from inPtr and store it in colorBlock. The width parameter
-	// specifies the size of the image in pixels.
-	static void ExtractBlock(const uint8* inPtr, int width, uint32* colorBlock)
-	{
-		// Compute the stride.
-		const int stride = width * 4;
-
-		// Copy the first row of pixels from inPtr into colorBlock.
-		_mm_store_si128((__m128i*)colorBlock, _mm_load_si128((__m128i*)inPtr));
-		inPtr += stride;
-
-		// Copy the second row of pixels from inPtr into colorBlock.
-		_mm_store_si128((__m128i*)(colorBlock + 4), _mm_load_si128((__m128i*)inPtr));
-		inPtr += stride;
-
-		// Copy the third row of pixels from inPtr into colorBlock.
-		_mm_store_si128((__m128i*)(colorBlock + 8), _mm_load_si128((__m128i*)inPtr));
-		inPtr += stride;
-
-		// Copy the forth row of pixels from inPtr into colorBlock.
-		_mm_store_si128((__m128i*)(colorBlock + 12), _mm_load_si128((__m128i*)inPtr));
-	}
-
-	static double CompressTwoClusters(int shapeIdx, const RGBAClusterSIMD *clusters, uint8 *outBuf, double estimatedError) {
-
-		uint8 tempBuf1[16];
-		BitStream tmpStream1(tempBuf1, 128, 0);
-		BC7CompressionModeSIMD compressor1(1, estimatedError);
-			
-		double bestError = compressor1.Compress(tmpStream1, shapeIdx, clusters);
-		memcpy(outBuf, tempBuf1, 16);
-		if(bestError == 0.0) {
-			return 0.0;
-		}
-
-		uint8 tempBuf3[16];
-		BitStream tmpStream3(tempBuf3, 128, 0);
-		BC7CompressionModeSIMD compressor3(3, estimatedError);
-
-		double error;
-		if((error = compressor3.Compress(tmpStream3, shapeIdx, clusters)) < bestError) {
-			bestError = error;
-			memcpy(outBuf, tempBuf3, 16);
-			if(bestError == 0.0) {
-				return 0.0;
-			}
-		}
-		
-		// Mode 3 offers more precision for RGB data. Mode 7 is really only if we have alpha.
-		//uint8 tempBuf7[16];
-		//BitStream tmpStream7(tempBuf7, 128, 0);
-		//BC7CompressionModeSIMD compressor7(7, estimatedError);		
-		//if((error = compressor7.Compress(tmpStream7, shapeIdx, clusters)) < bestError) {
-		//	memcpy(outBuf, tempBuf7, 16);
-		//	return error;
-		//}
-
-		return bestError;
-	}
-
-	static double CompressThreeClusters(int shapeIdx, const RGBAClusterSIMD *clusters, uint8 *outBuf, double estimatedError) {
-
-		uint8 tempBuf0[16];
-		BitStream tmpStream0(tempBuf0, 128, 0);
-
-		uint8 tempBuf2[16];
-		BitStream tmpStream2(tempBuf2, 128, 0);
-
-		BC7CompressionModeSIMD compressor0(0, estimatedError);
-		BC7CompressionModeSIMD compressor2(2, estimatedError);
-			
-		double error, bestError = (shapeIdx < 16)? compressor0.Compress(tmpStream0, shapeIdx, clusters) : DBL_MAX;
-		memcpy(outBuf, tempBuf0, 16);
-		if(bestError == 0.0) {
-			return 0.0;
-		}
-
-		if((error = compressor2.Compress(tmpStream2, shapeIdx, clusters)) < bestError) {
-			memcpy(outBuf, tempBuf2, 16);
-			return error;
-		}
-
-		return bestError;
-	}
-
-	static void PopulateTwoClustersForShape(const RGBAClusterSIMD &points, int shapeIdx, RGBAClusterSIMD *clusters) {
-		const uint16 shape = kShapeMask2[shapeIdx]; 
-		for(int pt = 0; pt < kMaxNumDataPoints; pt++) {
-
-			const RGBAVectorSIMD &p = points.GetPoint(pt);
-
-			if((1 << pt) & shape)
-				clusters[1].AddPoint(p, pt);
-			else
-				clusters[0].AddPoint(p, pt);
-		}
-
-		assert(!(clusters[0].GetPointBitString() & clusters[1].GetPointBitString()));
-		assert((clusters[0].GetPointBitString() ^ clusters[1].GetPointBitString()) == 0xFFFF);
-		assert((shape & clusters[1].GetPointBitString()) == shape);
-	}
-
-	static void PopulateThreeClustersForShape(const RGBAClusterSIMD &points, int shapeIdx, RGBAClusterSIMD *clusters) {
-		for(int pt = 0; pt < kMaxNumDataPoints; pt++) {
-
-			const RGBAVectorSIMD &p = points.GetPoint(pt);
-
-			if((1 << pt) & kShapeMask3[shapeIdx][0]) {
-				if((1 << pt) & kShapeMask3[shapeIdx][1])
-					clusters[2].AddPoint(p, pt);
-				else
-					clusters[1].AddPoint(p, pt);
-			}
-			else
-				clusters[0].AddPoint(p, pt);
-		}
-
-		assert(!(clusters[0].GetPointBitString() & clusters[1].GetPointBitString()));
-		assert(!(clusters[2].GetPointBitString() & clusters[1].GetPointBitString()));
-		assert(!(clusters[0].GetPointBitString() & clusters[2].GetPointBitString()));
-	}
-
-	static double EstimateTwoClusterError(RGBAClusterSIMD &c) {
-		RGBAVectorSIMD Min, Max, v;
-		c.GetBoundingBox(Min, Max);
-		v = Max - Min;
-		if(v * v == 0) {
-			return 0.0;
-		}
-
-		return 0.0001 + c.QuantizedError(Min, Max, 8, _mm_set1_epi32(0xFF));
-	}
-
-	static double EstimateThreeClusterError(RGBAClusterSIMD &c) {
-		RGBAVectorSIMD Min, Max, v;
-		c.GetBoundingBox(Min, Max);
-		v = Max - Min;
-		if(v * v == 0) {
-			return 0.0;
-		}
-
-		return 0.0001 + c.QuantizedError(Min, Max, 4, _mm_set1_epi32(0xFF));
-	}
-
-	// Compress a single block.
-	void CompressBC7Block(const uint32 *block, uint8 *outBuf) {
-		
-		// All a single color?
-		if(AllOneColor(block)) {
-			BitStream bStrm(outBuf, 128, 0);
-			CompressOptimalColorBC7(*((const uint32 *)block), bStrm);
-			return;
-		}		
-
-		RGBAClusterSIMD blockCluster;
-		bool opaque = true;
-		bool transparent = true;
-
-		for(int i = 0; i < kMaxNumDataPoints; i++) {
-			RGBAVectorSIMD p = RGBAVectorSIMD(block[i]);
-			blockCluster.AddPoint(p, i);
-			if(fabs(p.a - 255.0f) > 1e-10)
-				opaque = false;
-
-			if(p.a > 0.0f)
-				transparent = false;
-		}
-
-		// The whole block is transparent?
-		if(transparent) {
-			BitStream bStrm(outBuf, 128, 0);
-			WriteTransparentBlock(bStrm);
-			return;
-		}
-
-		// First we must figure out which shape to use. To do this, simply
-		// see which shape has the smallest sum of minimum bounding spheres.
-		double bestError[2] = { DBL_MAX, DBL_MAX };
-		int bestShapeIdx[2] = { -1, -1 };
-		RGBAClusterSIMD bestClusters[2][3];
-
-		for(int i = 0; i < kNumShapes2; i++) 
-		{
-			RGBAClusterSIMD clusters[2];
-			PopulateTwoClustersForShape(blockCluster, i, clusters);
-
-			double err = 0.0;
-			for(int ci = 0; ci < 2; ci++) {
-				err += EstimateTwoClusterError(clusters[ci]);
-			}
-
-			// If it's small, we'll take it!
-			if(err < 1e-9) {
-				CompressTwoClusters(i, clusters, outBuf, err);
-				return;
-			}
-
-			if(err < bestError[0]) {
-				bestError[0] = err;
-				bestShapeIdx[0] = i;
-				bestClusters[0][0] = clusters[0];
-				bestClusters[0][1] = clusters[1];
-			}
-		}
-
-		// There are not 3 subset blocks that support alpha...
-		if(opaque) {
-			for(int i = 0; i < kNumShapes3; i++) {
-
-				RGBAClusterSIMD clusters[3];
-				PopulateThreeClustersForShape(blockCluster, i, clusters);
-
-				double err = 0.0;
-				for(int ci = 0; ci < 3; ci++) {
-					err += EstimateThreeClusterError(clusters[ci]);
-				}
-
-				// If it's small, we'll take it!
-				if(err < 1e-9) {
-					CompressThreeClusters(i, clusters, outBuf, err);
-					return;
-				}
-
-				if(err < bestError[1]) {
-					bestError[1] = err;
-					bestShapeIdx[1] = i;
-					bestClusters[1][0] = clusters[0];
-					bestClusters[1][1] = clusters[1];
-					bestClusters[1][2] = clusters[2];
-				}
-			}
-		}
-
-		if(opaque) {
-
-			uint8 tempBuf1[16];
-			uint8 tempBuf2[16];
-
-			BitStream tempStream1 (tempBuf1, 128, 0);
-			BC7CompressionModeSIMD compressor(6, DBL_MAX);
-			double best = compressor.Compress(tempStream1, 0, &blockCluster);
-			if(best == 0.0f) {
-				memcpy(outBuf, tempBuf1, 16);
-				return;
-			}
-
-			double error = DBL_MAX;
-			if((error = CompressTwoClusters(bestShapeIdx[0], bestClusters[0], tempBuf2, bestError[0])) < best) {
-				best = error;
-				if(error == 0.0f) {
-					memcpy(outBuf, tempBuf2, 16);
-					return;
-				}
-				else {
-					memcpy(tempBuf1, tempBuf2, 16);
-				}
-			}
-
-			if(CompressThreeClusters(bestShapeIdx[1], bestClusters[1], tempBuf2, bestError[1]) < best) {
-				memcpy(outBuf, tempBuf2, 16);
-				return;
-			}
-
-			memcpy(outBuf, tempBuf1, 16);
-		}
-		else {
-			assert(!"Don't support alpha yet!");
-		}
-	}
+  // Function prototypes
+  static void ExtractBlock(const uint8* inPtr, int width, uint32* colorBlock);
+  static void CompressBC7Block(const uint32 *block, uint8 *outBuf);
+
+  // Returns true if the entire block is a single color.
+  static bool AllOneColor(const uint32 block[16]) {
+    const uint32 pixel = block[0];
+    for(int i = 1; i < 16; i++) {
+      if( block[i] != pixel )
+        return false;
+    }
+
+    return true;
+  }
+
+  // Write out a transparent block.
+  static void WriteTransparentBlock(BitStream &stream) {
+    // Use mode 6
+    stream.WriteBits(1 << 6, 7);
+    stream.WriteBits(0, 128-7);
+    assert(stream.GetBitsWritten() == 128);
+  }
+
+  // Compresses a single color optimally and outputs the result.
+  static void CompressOptimalColorBC7(uint32 pixel, BitStream &stream) {
+
+    stream.WriteBits(1 << 5, 6); // Mode 5
+    stream.WriteBits(0, 2); // No rotation bits.
+
+    uint8 r = pixel & 0xFF;
+    uint8 g = (pixel >> 8) & 0xFF;
+    uint8 b = (pixel >> 16) & 0xFF;
+    uint8 a = (pixel >> 24) & 0xFF;
+
+    // Red endpoints
+    stream.WriteBits(Optimal7CompressBC7Mode5[r][0], 7);
+    stream.WriteBits(Optimal7CompressBC7Mode5[r][1], 7);
+
+    // Green endpoints
+    stream.WriteBits(Optimal7CompressBC7Mode5[g][0], 7);
+    stream.WriteBits(Optimal7CompressBC7Mode5[g][1], 7);
+
+    // Blue endpoints
+    stream.WriteBits(Optimal7CompressBC7Mode5[b][0], 7);
+    stream.WriteBits(Optimal7CompressBC7Mode5[b][1], 7);
+
+    // Alpha endpoints... are just the same.
+    stream.WriteBits(a, 8);
+    stream.WriteBits(a, 8);
+        
+    // Color indices are 1 for each pixel...
+    // Anchor index is 0, so 1 bit for the first pixel, then
+    // 01 for each following pixel giving the sequence of 31 bits:
+    // ...010101011
+    stream.WriteBits(0xaaaaaaab, 31);
+
+    // Alpha indices...
+    stream.WriteBits(kWMValues[gWMVal = (gWMVal+1) % kNumWMVals], 31); 
+  }
+
+  // Compress an image using BC7 compression. Use the inBuf parameter to point to an image in
+  // 4-byte RGBA format. The width and height parameters specify the size of the image in pixels.
+  // The buffer pointed to by outBuf should be large enough to store the compressed image. This
+  // implementation has an 4:1 compression ratio.
+  void CompressImageBC7SIMD(const unsigned char *inBuf, unsigned char *outBuf, unsigned int width, unsigned int height)
+  {
+    ALIGN_SSE uint32 block[16];
+
+    _MM_SET_ROUNDING_MODE( _MM_ROUND_TOWARD_ZERO );
+    BC7CompressionModeSIMD::ResetNumUses();
+
+    BC7CompressionModeSIMD::MaxAnnealingIterations = GetQualityLevel();
+
+    for(int j = 0; j < height; j += 4) {
+      for(int i = 0; i < width; i += 4) {
+        CompressBC7Block((const uint32 *)inBuf, outBuf);
+
+        outBuf += 16;
+        inBuf += 64;
+      }
+    }
+  }
+
+  // Extract a 4 by 4 block of pixels from inPtr and store it in colorBlock. The width parameter
+  // specifies the size of the image in pixels.
+  static void ExtractBlock(const uint8* inPtr, int width, uint32* colorBlock)
+  {
+    // Compute the stride.
+    const int stride = width * 4;
+
+    // Copy the first row of pixels from inPtr into colorBlock.
+    _mm_store_si128((__m128i*)colorBlock, _mm_load_si128((__m128i*)inPtr));
+    inPtr += stride;
+
+    // Copy the second row of pixels from inPtr into colorBlock.
+    _mm_store_si128((__m128i*)(colorBlock + 4), _mm_load_si128((__m128i*)inPtr));
+    inPtr += stride;
+
+    // Copy the third row of pixels from inPtr into colorBlock.
+    _mm_store_si128((__m128i*)(colorBlock + 8), _mm_load_si128((__m128i*)inPtr));
+    inPtr += stride;
+
+    // Copy the forth row of pixels from inPtr into colorBlock.
+    _mm_store_si128((__m128i*)(colorBlock + 12), _mm_load_si128((__m128i*)inPtr));
+  }
+
+  static double CompressTwoClusters(int shapeIdx, const RGBAClusterSIMD *clusters, uint8 *outBuf, double estimatedError) {
+
+    uint8 tempBuf1[16];
+    BitStream tmpStream1(tempBuf1, 128, 0);
+    BC7CompressionModeSIMD compressor1(1, estimatedError);
+            
+    double bestError = compressor1.Compress(tmpStream1, shapeIdx, clusters);
+    memcpy(outBuf, tempBuf1, 16);
+    if(bestError == 0.0) {
+      return 0.0;
+    }
+
+    uint8 tempBuf3[16];
+    BitStream tmpStream3(tempBuf3, 128, 0);
+    BC7CompressionModeSIMD compressor3(3, estimatedError);
+
+    double error;
+    if((error = compressor3.Compress(tmpStream3, shapeIdx, clusters)) < bestError) {
+      bestError = error;
+      memcpy(outBuf, tempBuf3, 16);
+      if(bestError == 0.0) {
+        return 0.0;
+      }
+    }
+        
+    // Mode 3 offers more precision for RGB data. Mode 7 is really only if we have alpha.
+    //uint8 tempBuf7[16];
+    //BitStream tmpStream7(tempBuf7, 128, 0);
+    //BC7CompressionModeSIMD compressor7(7, estimatedError);        
+    //if((error = compressor7.Compress(tmpStream7, shapeIdx, clusters)) < bestError) {
+    //  memcpy(outBuf, tempBuf7, 16);
+    //  return error;
+    //}
+
+    return bestError;
+  }
+
+  static double CompressThreeClusters(int shapeIdx, const RGBAClusterSIMD *clusters, uint8 *outBuf, double estimatedError) {
+
+    uint8 tempBuf0[16];
+    BitStream tmpStream0(tempBuf0, 128, 0);
+
+    uint8 tempBuf2[16];
+    BitStream tmpStream2(tempBuf2, 128, 0);
+
+    BC7CompressionModeSIMD compressor0(0, estimatedError);
+    BC7CompressionModeSIMD compressor2(2, estimatedError);
+            
+    double error, bestError = (shapeIdx < 16)? compressor0.Compress(tmpStream0, shapeIdx, clusters) : DBL_MAX;
+    memcpy(outBuf, tempBuf0, 16);
+    if(bestError == 0.0) {
+      return 0.0;
+    }
+
+    if((error = compressor2.Compress(tmpStream2, shapeIdx, clusters)) < bestError) {
+      memcpy(outBuf, tempBuf2, 16);
+      return error;
+    }
+
+    return bestError;
+  }
+
+  static void PopulateTwoClustersForShape(const RGBAClusterSIMD &points, int shapeIdx, RGBAClusterSIMD *clusters) {
+    const uint16 shape = kShapeMask2[shapeIdx]; 
+    for(int pt = 0; pt < kMaxNumDataPoints; pt++) {
+
+      const RGBAVectorSIMD &p = points.GetPoint(pt);
+
+      if((1 << pt) & shape)
+        clusters[1].AddPoint(p, pt);
+      else
+        clusters[0].AddPoint(p, pt);
+    }
+
+    assert(!(clusters[0].GetPointBitString() & clusters[1].GetPointBitString()));
+    assert((clusters[0].GetPointBitString() ^ clusters[1].GetPointBitString()) == 0xFFFF);
+    assert((shape & clusters[1].GetPointBitString()) == shape);
+  }
+
+  static void PopulateThreeClustersForShape(const RGBAClusterSIMD &points, int shapeIdx, RGBAClusterSIMD *clusters) {
+    for(int pt = 0; pt < kMaxNumDataPoints; pt++) {
+
+      const RGBAVectorSIMD &p = points.GetPoint(pt);
+
+      if((1 << pt) & kShapeMask3[shapeIdx][0]) {
+        if((1 << pt) & kShapeMask3[shapeIdx][1])
+          clusters[2].AddPoint(p, pt);
+        else
+          clusters[1].AddPoint(p, pt);
+      }
+      else
+        clusters[0].AddPoint(p, pt);
+    }
+
+    assert(!(clusters[0].GetPointBitString() & clusters[1].GetPointBitString()));
+    assert(!(clusters[2].GetPointBitString() & clusters[1].GetPointBitString()));
+    assert(!(clusters[0].GetPointBitString() & clusters[2].GetPointBitString()));
+  }
+
+  static double EstimateTwoClusterError(RGBAClusterSIMD &c) {
+    RGBAVectorSIMD Min, Max, v;
+    c.GetBoundingBox(Min, Max);
+    v = Max - Min;
+    if(v * v == 0) {
+      return 0.0;
+    }
+
+    return 0.0001 + c.QuantizedError(Min, Max, 8, _mm_set1_epi32(0xFF));
+  }
+
+  static double EstimateThreeClusterError(RGBAClusterSIMD &c) {
+    RGBAVectorSIMD Min, Max, v;
+    c.GetBoundingBox(Min, Max);
+    v = Max - Min;
+    if(v * v == 0) {
+      return 0.0;
+    }
+
+    return 0.0001 + c.QuantizedError(Min, Max, 4, _mm_set1_epi32(0xFF));
+  }
+
+  // Compress a single block.
+  void CompressBC7Block(const uint32 *block, uint8 *outBuf) {
+        
+    // All a single color?
+    if(AllOneColor(block)) {
+      BitStream bStrm(outBuf, 128, 0);
+      CompressOptimalColorBC7(*((const uint32 *)block), bStrm);
+      return;
+    }       
+
+    RGBAClusterSIMD blockCluster;
+    bool opaque = true;
+    bool transparent = true;
+
+    for(int i = 0; i < kMaxNumDataPoints; i++) {
+      RGBAVectorSIMD p = RGBAVectorSIMD(block[i]);
+      blockCluster.AddPoint(p, i);
+      if(fabs(p.a - 255.0f) > 1e-10)
+        opaque = false;
+
+      if(p.a > 0.0f)
+        transparent = false;
+    }
+
+    // The whole block is transparent?
+    if(transparent) {
+      BitStream bStrm(outBuf, 128, 0);
+      WriteTransparentBlock(bStrm);
+      return;
+    }
+
+    // First we must figure out which shape to use. To do this, simply
+    // see which shape has the smallest sum of minimum bounding spheres.
+    double bestError[2] = { DBL_MAX, DBL_MAX };
+    int bestShapeIdx[2] = { -1, -1 };
+    RGBAClusterSIMD bestClusters[2][3];
+
+    for(int i = 0; i < kNumShapes2; i++) {
+      RGBAClusterSIMD clusters[2];
+      PopulateTwoClustersForShape(blockCluster, i, clusters);
+
+      double err = 0.0;
+      for(int ci = 0; ci < 2; ci++) {
+        err += EstimateTwoClusterError(clusters[ci]);
+      }
+
+      // If it's small, we'll take it!
+      if(err < 1e-9) {
+        CompressTwoClusters(i, clusters, outBuf, err);
+        return;
+      }
+
+      if(err < bestError[0]) {
+        bestError[0] = err;
+        bestShapeIdx[0] = i;
+        bestClusters[0][0] = clusters[0];
+        bestClusters[0][1] = clusters[1];
+      }
+    }
+
+    // There are not 3 subset blocks that support alpha...
+    if(opaque) {
+      for(int i = 0; i < kNumShapes3; i++) {
+
+        RGBAClusterSIMD clusters[3];
+        PopulateThreeClustersForShape(blockCluster, i, clusters);
+
+        double err = 0.0;
+        for(int ci = 0; ci < 3; ci++) {
+          err += EstimateThreeClusterError(clusters[ci]);
+        }
+
+        // If it's small, we'll take it!
+        if(err < 1e-9) {
+          CompressThreeClusters(i, clusters, outBuf, err);
+          return;
+        }
+
+        if(err < bestError[1]) {
+          bestError[1] = err;
+          bestShapeIdx[1] = i;
+          bestClusters[1][0] = clusters[0];
+          bestClusters[1][1] = clusters[1];
+          bestClusters[1][2] = clusters[2];
+        }
+      }
+    }
+
+    if(opaque) {
+
+      uint8 tempBuf1[16];
+      uint8 tempBuf2[16];
+
+      BitStream tempStream1 (tempBuf1, 128, 0);
+      BC7CompressionModeSIMD compressor(6, DBL_MAX);
+      double best = compressor.Compress(tempStream1, 0, &blockCluster);
+      if(best == 0.0f) {
+        memcpy(outBuf, tempBuf1, 16);
+        return;
+      }
+
+      double error = DBL_MAX;
+      if((error = CompressTwoClusters(bestShapeIdx[0], bestClusters[0], tempBuf2, bestError[0])) < best) {
+        best = error;
+        if(error == 0.0f) {
+          memcpy(outBuf, tempBuf2, 16);
+          return;
+        }
+        else {
+          memcpy(tempBuf1, tempBuf2, 16);
+        }
+      }
+
+      if(CompressThreeClusters(bestShapeIdx[1], bestClusters[1], tempBuf2, bestError[1]) < best) {
+        memcpy(outBuf, tempBuf2, 16);
+        return;
+      }
+
+      memcpy(outBuf, tempBuf1, 16);
+    }
+    else {
+      assert(!"Don't support alpha yet!");
+    }
+  }
 }
diff --git a/BPTCEncoder/src/BCLookupTables.h b/BPTCEncoder/src/BCLookupTables.h
index 55ed783..f28ca95 100755
--- a/BPTCEncoder/src/BCLookupTables.h
+++ b/BPTCEncoder/src/BCLookupTables.h
@@ -68,315 +68,315 @@
 // tables to speed up this precision by allowing every value to be 1/3 of the way
 // between the two colors specified.
 /*
-	UINT nbits = 7;
-	UINT lastNum = -1;
-	UINT vals[255];
-	UINT valIdx = 0;
-	for(UINT i = 0; i < 256; i++) {
-		UINT num = (i >> (8 - nbits));
-		num <<= (8-nbits);
-		num |= i >> nbits;
+  UINT nbits = 7;
+  UINT lastNum = -1;
+  UINT vals[255];
+  UINT valIdx = 0;
+  for(UINT i = 0; i < 256; i++) {
+    UINT num = (i >> (8 - nbits));
+    num <<= (8-nbits);
+    num |= i >> nbits;
 
-		if(num != lastNum) {
-			lastNum = num;
-			vals[valIdx++] = num;
-		}
-	}
+    if(num != lastNum) {
+      lastNum = num;
+      vals[valIdx++] = num;
+    }
+  }
 
-	for(UINT i = 0; i < 256; i++) {
+  for(UINT i = 0; i < 256; i++) {
 
-		UINT mindist = 0xFFFFFFFF;
-		UINT minj = 0, mink = 0;
+    UINT mindist = 0xFFFFFFFF;
+    UINT minj = 0, mink = 0;
 
-		UINT tableEntry[2] = { 0, 0 };
+    UINT tableEntry[2] = { 0, 0 };
 
-		mindist = 0xFFFFFFFF;
-		minj = 0, mink = 0;
+    mindist = 0xFFFFFFFF;
+    minj = 0, mink = 0;
 
-		for(UINT j = 0; j < valIdx; j++) {
-			for(UINT k = 0; k < valIdx ; k++) {
+    for(UINT j = 0; j < valIdx; j++) {
+      for(UINT k = 0; k < valIdx ; k++) {
 
-				UINT combo = (43 * vals[j] + 21 * vals[k] + 32) >> 6;
-				UINT dist = ((i > combo) ? i - combo : combo - i);
-				if( dist < mindist )
-				{
-					mindist = dist;
-					minj = j;
-					mink = k;
-				}
-			}
-		}
+        UINT combo = (43 * vals[j] + 21 * vals[k] + 32) >> 6;
+        UINT dist = ((i > combo) ? i - combo : combo - i);
+        if( dist < mindist )
+        {
+          mindist = dist;
+          minj = j;
+          mink = k;
+        }
+      }
+    }
 
-		assert(mindist == 0);
+    assert(mindist == 0);
 
-		tableEntry[0] = vals[minj];
-		tableEntry[1] = vals[mink];
+    tableEntry[0] = vals[minj];
+    tableEntry[1] = vals[mink];
 
-		wchar_t tableEntryStr[256];
-		swprintf(tableEntryStr, 256, L"{ 0x%02x, 0x%02x },\n", 
-			tableEntry[0] >> (8 - nbits),
-			tableEntry[1] >> (8 - nbits)
-		);
-		OutputDebugString(tableEntryStr);
-	}
+    wchar_t tableEntryStr[256];
+    swprintf(tableEntryStr, 256, L"{ 0x%02x, 0x%02x },\n", 
+      tableEntry[0] >> (8 - nbits),
+      tableEntry[1] >> (8 - nbits)
+    );
+    OutputDebugString(tableEntryStr);
+  }
 */
 static unsigned char Optimal7CompressBC7Mode5[256][2] = {
-	{ 0x00, 0x00 },
-	{ 0x00, 0x01 },
-	{ 0x00, 0x03 },
-	{ 0x00, 0x04 },
-	{ 0x00, 0x06 },
-	{ 0x00, 0x07 },
-	{ 0x00, 0x09 },
-	{ 0x00, 0x0a },
-	{ 0x00, 0x0c },
-	{ 0x00, 0x0d },
-	{ 0x00, 0x0f },
-	{ 0x00, 0x10 },
-	{ 0x00, 0x12 },
-	{ 0x00, 0x14 },
-	{ 0x00, 0x15 },
-	{ 0x00, 0x17 },
-	{ 0x00, 0x18 },
-	{ 0x00, 0x1a },
-	{ 0x00, 0x1b },
-	{ 0x00, 0x1d },
-	{ 0x00, 0x1e },
-	{ 0x00, 0x20 },
-	{ 0x00, 0x21 },
-	{ 0x00, 0x23 },
-	{ 0x00, 0x24 },
-	{ 0x00, 0x26 },
-	{ 0x00, 0x27 },
-	{ 0x00, 0x29 },
-	{ 0x00, 0x2a },
-	{ 0x00, 0x2c },
-	{ 0x00, 0x2d },
-	{ 0x00, 0x2f },
-	{ 0x00, 0x30 },
-	{ 0x00, 0x32 },
-	{ 0x00, 0x34 },
-	{ 0x00, 0x35 },
-	{ 0x00, 0x37 },
-	{ 0x00, 0x38 },
-	{ 0x00, 0x3a },
-	{ 0x00, 0x3b },
-	{ 0x00, 0x3d },
-	{ 0x00, 0x3e },
-	{ 0x00, 0x40 },
-	{ 0x00, 0x41 },
-	{ 0x00, 0x42 },
-	{ 0x00, 0x44 },
-	{ 0x00, 0x45 },
-	{ 0x00, 0x47 },
-	{ 0x00, 0x48 },
-	{ 0x00, 0x4a },
-	{ 0x00, 0x4b },
-	{ 0x00, 0x4d },
-	{ 0x00, 0x4e },
-	{ 0x00, 0x50 },
-	{ 0x00, 0x52 },
-	{ 0x00, 0x53 },
-	{ 0x00, 0x55 },
-	{ 0x00, 0x56 },
-	{ 0x00, 0x58 },
-	{ 0x00, 0x59 },
-	{ 0x00, 0x5b },
-	{ 0x00, 0x5c },
-	{ 0x00, 0x5e },
-	{ 0x00, 0x5f },
-	{ 0x00, 0x61 },
-	{ 0x00, 0x62 },
-	{ 0x00, 0x64 },
-	{ 0x00, 0x65 },
-	{ 0x00, 0x67 },
-	{ 0x00, 0x68 },
-	{ 0x00, 0x6a },
-	{ 0x00, 0x6b },
-	{ 0x00, 0x6d },
-	{ 0x00, 0x6e },
-	{ 0x00, 0x70 },
-	{ 0x00, 0x72 },
-	{ 0x00, 0x73 },
-	{ 0x00, 0x75 },
-	{ 0x00, 0x76 },
-	{ 0x00, 0x78 },
-	{ 0x00, 0x79 },
-	{ 0x00, 0x7b },
-	{ 0x00, 0x7c },
-	{ 0x00, 0x7e },
-	{ 0x00, 0x7f },
-	{ 0x01, 0x7f },
-	{ 0x02, 0x7e },
-	{ 0x03, 0x7e },
-	{ 0x03, 0x7f },
-	{ 0x04, 0x7f },
-	{ 0x05, 0x7e },
-	{ 0x06, 0x7e },
-	{ 0x06, 0x7f },
-	{ 0x07, 0x7f },
-	{ 0x08, 0x7e },
-	{ 0x09, 0x7e },
-	{ 0x09, 0x7f },
-	{ 0x0a, 0x7f },
-	{ 0x0b, 0x7e },
-	{ 0x0c, 0x7e },
-	{ 0x0c, 0x7f },
-	{ 0x0d, 0x7f },
-	{ 0x0e, 0x7e },
-	{ 0x0f, 0x7d },
-	{ 0x0f, 0x7f },
-	{ 0x10, 0x7e },
-	{ 0x11, 0x7e },
-	{ 0x11, 0x7f },
-	{ 0x12, 0x7f },
-	{ 0x13, 0x7e },
-	{ 0x14, 0x7e },
-	{ 0x14, 0x7f },
-	{ 0x15, 0x7f },
-	{ 0x16, 0x7e },
-	{ 0x17, 0x7e },
-	{ 0x17, 0x7f },
-	{ 0x18, 0x7f },
-	{ 0x19, 0x7e },
-	{ 0x1a, 0x7e },
-	{ 0x1a, 0x7f },
-	{ 0x1b, 0x7f },
-	{ 0x1c, 0x7e },
-	{ 0x1d, 0x7e },
-	{ 0x1d, 0x7f },
-	{ 0x1e, 0x7f },
-	{ 0x1f, 0x7e },
-	{ 0x20, 0x7e },
-	{ 0x20, 0x7f },
-	{ 0x21, 0x7f },
-	{ 0x22, 0x7e },
-	{ 0x23, 0x7e },
-	{ 0x23, 0x7f },
-	{ 0x24, 0x7f },
-	{ 0x25, 0x7e },
-	{ 0x26, 0x7e },
-	{ 0x26, 0x7f },
-	{ 0x27, 0x7f },
-	{ 0x28, 0x7e },
-	{ 0x29, 0x7e },
-	{ 0x29, 0x7f },
-	{ 0x2a, 0x7f },
-	{ 0x2b, 0x7e },
-	{ 0x2c, 0x7e },
-	{ 0x2c, 0x7f },
-	{ 0x2d, 0x7f },
-	{ 0x2e, 0x7e },
-	{ 0x2f, 0x7d },
-	{ 0x2f, 0x7f },
-	{ 0x30, 0x7e },
-	{ 0x31, 0x7e },
-	{ 0x31, 0x7f },
-	{ 0x32, 0x7f },
-	{ 0x33, 0x7e },
-	{ 0x34, 0x7e },
-	{ 0x34, 0x7f },
-	{ 0x35, 0x7f },
-	{ 0x36, 0x7e },
-	{ 0x37, 0x7e },
-	{ 0x37, 0x7f },
-	{ 0x38, 0x7f },
-	{ 0x39, 0x7e },
-	{ 0x3a, 0x7e },
-	{ 0x3a, 0x7f },
-	{ 0x3b, 0x7f },
-	{ 0x3c, 0x7e },
-	{ 0x3d, 0x7e },
-	{ 0x3d, 0x7f },
-	{ 0x3e, 0x7f },
-	{ 0x3f, 0x7e },
-	{ 0x40, 0x7d },
-	{ 0x40, 0x7e },
-	{ 0x41, 0x7e },
-	{ 0x41, 0x7f },
-	{ 0x42, 0x7f },
-	{ 0x43, 0x7e },
-	{ 0x44, 0x7e },
-	{ 0x44, 0x7f },
-	{ 0x45, 0x7f },
-	{ 0x46, 0x7e },
-	{ 0x47, 0x7e },
-	{ 0x47, 0x7f },
-	{ 0x48, 0x7f },
-	{ 0x49, 0x7e },
-	{ 0x4a, 0x7e },
-	{ 0x4a, 0x7f },
-	{ 0x4b, 0x7f },
-	{ 0x4c, 0x7e },
-	{ 0x4d, 0x7d },
-	{ 0x4d, 0x7f },
-	{ 0x4e, 0x7e },
-	{ 0x4f, 0x7e },
-	{ 0x4f, 0x7f },
-	{ 0x50, 0x7f },
-	{ 0x51, 0x7e },
-	{ 0x52, 0x7e },
-	{ 0x52, 0x7f },
-	{ 0x53, 0x7f },
-	{ 0x54, 0x7e },
-	{ 0x55, 0x7e },
-	{ 0x55, 0x7f },
-	{ 0x56, 0x7f },
-	{ 0x57, 0x7e },
-	{ 0x58, 0x7e },
-	{ 0x58, 0x7f },
-	{ 0x59, 0x7f },
-	{ 0x5a, 0x7e },
-	{ 0x5b, 0x7e },
-	{ 0x5b, 0x7f },
-	{ 0x5c, 0x7f },
-	{ 0x5d, 0x7e },
-	{ 0x5e, 0x7e },
-	{ 0x5e, 0x7f },
-	{ 0x5f, 0x7f },
-	{ 0x60, 0x7e },
-	{ 0x61, 0x7e },
-	{ 0x61, 0x7f },
-	{ 0x62, 0x7f },
-	{ 0x63, 0x7e },
-	{ 0x64, 0x7e },
-	{ 0x64, 0x7f },
-	{ 0x65, 0x7f },
-	{ 0x66, 0x7e },
-	{ 0x67, 0x7e },
-	{ 0x67, 0x7f },
-	{ 0x68, 0x7f },
-	{ 0x69, 0x7e },
-	{ 0x6a, 0x7e },
-	{ 0x6a, 0x7f },
-	{ 0x6b, 0x7f },
-	{ 0x6c, 0x7e },
-	{ 0x6d, 0x7d },
-	{ 0x6d, 0x7f },
-	{ 0x6e, 0x7e },
-	{ 0x6f, 0x7e },
-	{ 0x6f, 0x7f },
-	{ 0x70, 0x7f },
-	{ 0x71, 0x7e },
-	{ 0x72, 0x7e },
-	{ 0x72, 0x7f },
-	{ 0x73, 0x7f },
-	{ 0x74, 0x7e },
-	{ 0x75, 0x7e },
-	{ 0x75, 0x7f },
-	{ 0x76, 0x7f },
-	{ 0x77, 0x7e },
-	{ 0x78, 0x7e },
-	{ 0x78, 0x7f },
-	{ 0x79, 0x7f },
-	{ 0x7a, 0x7e },
-	{ 0x7b, 0x7e },
-	{ 0x7b, 0x7f },
-	{ 0x7c, 0x7f },
-	{ 0x7d, 0x7e },
-	{ 0x7e, 0x7e },
-	{ 0x7e, 0x7f },
-	{ 0x7f, 0x7f }
+  { 0x00, 0x00 },
+  { 0x00, 0x01 },
+  { 0x00, 0x03 },
+  { 0x00, 0x04 },
+  { 0x00, 0x06 },
+  { 0x00, 0x07 },
+  { 0x00, 0x09 },
+  { 0x00, 0x0a },
+  { 0x00, 0x0c },
+  { 0x00, 0x0d },
+  { 0x00, 0x0f },
+  { 0x00, 0x10 },
+  { 0x00, 0x12 },
+  { 0x00, 0x14 },
+  { 0x00, 0x15 },
+  { 0x00, 0x17 },
+  { 0x00, 0x18 },
+  { 0x00, 0x1a },
+  { 0x00, 0x1b },
+  { 0x00, 0x1d },
+  { 0x00, 0x1e },
+  { 0x00, 0x20 },
+  { 0x00, 0x21 },
+  { 0x00, 0x23 },
+  { 0x00, 0x24 },
+  { 0x00, 0x26 },
+  { 0x00, 0x27 },
+  { 0x00, 0x29 },
+  { 0x00, 0x2a },
+  { 0x00, 0x2c },
+  { 0x00, 0x2d },
+  { 0x00, 0x2f },
+  { 0x00, 0x30 },
+  { 0x00, 0x32 },
+  { 0x00, 0x34 },
+  { 0x00, 0x35 },
+  { 0x00, 0x37 },
+  { 0x00, 0x38 },
+  { 0x00, 0x3a },
+  { 0x00, 0x3b },
+  { 0x00, 0x3d },
+  { 0x00, 0x3e },
+  { 0x00, 0x40 },
+  { 0x00, 0x41 },
+  { 0x00, 0x42 },
+  { 0x00, 0x44 },
+  { 0x00, 0x45 },
+  { 0x00, 0x47 },
+  { 0x00, 0x48 },
+  { 0x00, 0x4a },
+  { 0x00, 0x4b },
+  { 0x00, 0x4d },
+  { 0x00, 0x4e },
+  { 0x00, 0x50 },
+  { 0x00, 0x52 },
+  { 0x00, 0x53 },
+  { 0x00, 0x55 },
+  { 0x00, 0x56 },
+  { 0x00, 0x58 },
+  { 0x00, 0x59 },
+  { 0x00, 0x5b },
+  { 0x00, 0x5c },
+  { 0x00, 0x5e },
+  { 0x00, 0x5f },
+  { 0x00, 0x61 },
+  { 0x00, 0x62 },
+  { 0x00, 0x64 },
+  { 0x00, 0x65 },
+  { 0x00, 0x67 },
+  { 0x00, 0x68 },
+  { 0x00, 0x6a },
+  { 0x00, 0x6b },
+  { 0x00, 0x6d },
+  { 0x00, 0x6e },
+  { 0x00, 0x70 },
+  { 0x00, 0x72 },
+  { 0x00, 0x73 },
+  { 0x00, 0x75 },
+  { 0x00, 0x76 },
+  { 0x00, 0x78 },
+  { 0x00, 0x79 },
+  { 0x00, 0x7b },
+  { 0x00, 0x7c },
+  { 0x00, 0x7e },
+  { 0x00, 0x7f },
+  { 0x01, 0x7f },
+  { 0x02, 0x7e },
+  { 0x03, 0x7e },
+  { 0x03, 0x7f },
+  { 0x04, 0x7f },
+  { 0x05, 0x7e },
+  { 0x06, 0x7e },
+  { 0x06, 0x7f },
+  { 0x07, 0x7f },
+  { 0x08, 0x7e },
+  { 0x09, 0x7e },
+  { 0x09, 0x7f },
+  { 0x0a, 0x7f },
+  { 0x0b, 0x7e },
+  { 0x0c, 0x7e },
+  { 0x0c, 0x7f },
+  { 0x0d, 0x7f },
+  { 0x0e, 0x7e },
+  { 0x0f, 0x7d },
+  { 0x0f, 0x7f },
+  { 0x10, 0x7e },
+  { 0x11, 0x7e },
+  { 0x11, 0x7f },
+  { 0x12, 0x7f },
+  { 0x13, 0x7e },
+  { 0x14, 0x7e },
+  { 0x14, 0x7f },
+  { 0x15, 0x7f },
+  { 0x16, 0x7e },
+  { 0x17, 0x7e },
+  { 0x17, 0x7f },
+  { 0x18, 0x7f },
+  { 0x19, 0x7e },
+  { 0x1a, 0x7e },
+  { 0x1a, 0x7f },
+  { 0x1b, 0x7f },
+  { 0x1c, 0x7e },
+  { 0x1d, 0x7e },
+  { 0x1d, 0x7f },
+  { 0x1e, 0x7f },
+  { 0x1f, 0x7e },
+  { 0x20, 0x7e },
+  { 0x20, 0x7f },
+  { 0x21, 0x7f },
+  { 0x22, 0x7e },
+  { 0x23, 0x7e },
+  { 0x23, 0x7f },
+  { 0x24, 0x7f },
+  { 0x25, 0x7e },
+  { 0x26, 0x7e },
+  { 0x26, 0x7f },
+  { 0x27, 0x7f },
+  { 0x28, 0x7e },
+  { 0x29, 0x7e },
+  { 0x29, 0x7f },
+  { 0x2a, 0x7f },
+  { 0x2b, 0x7e },
+  { 0x2c, 0x7e },
+  { 0x2c, 0x7f },
+  { 0x2d, 0x7f },
+  { 0x2e, 0x7e },
+  { 0x2f, 0x7d },
+  { 0x2f, 0x7f },
+  { 0x30, 0x7e },
+  { 0x31, 0x7e },
+  { 0x31, 0x7f },
+  { 0x32, 0x7f },
+  { 0x33, 0x7e },
+  { 0x34, 0x7e },
+  { 0x34, 0x7f },
+  { 0x35, 0x7f },
+  { 0x36, 0x7e },
+  { 0x37, 0x7e },
+  { 0x37, 0x7f },
+  { 0x38, 0x7f },
+  { 0x39, 0x7e },
+  { 0x3a, 0x7e },
+  { 0x3a, 0x7f },
+  { 0x3b, 0x7f },
+  { 0x3c, 0x7e },
+  { 0x3d, 0x7e },
+  { 0x3d, 0x7f },
+  { 0x3e, 0x7f },
+  { 0x3f, 0x7e },
+  { 0x40, 0x7d },
+  { 0x40, 0x7e },
+  { 0x41, 0x7e },
+  { 0x41, 0x7f },
+  { 0x42, 0x7f },
+  { 0x43, 0x7e },
+  { 0x44, 0x7e },
+  { 0x44, 0x7f },
+  { 0x45, 0x7f },
+  { 0x46, 0x7e },
+  { 0x47, 0x7e },
+  { 0x47, 0x7f },
+  { 0x48, 0x7f },
+  { 0x49, 0x7e },
+  { 0x4a, 0x7e },
+  { 0x4a, 0x7f },
+  { 0x4b, 0x7f },
+  { 0x4c, 0x7e },
+  { 0x4d, 0x7d },
+  { 0x4d, 0x7f },
+  { 0x4e, 0x7e },
+  { 0x4f, 0x7e },
+  { 0x4f, 0x7f },
+  { 0x50, 0x7f },
+  { 0x51, 0x7e },
+  { 0x52, 0x7e },
+  { 0x52, 0x7f },
+  { 0x53, 0x7f },
+  { 0x54, 0x7e },
+  { 0x55, 0x7e },
+  { 0x55, 0x7f },
+  { 0x56, 0x7f },
+  { 0x57, 0x7e },
+  { 0x58, 0x7e },
+  { 0x58, 0x7f },
+  { 0x59, 0x7f },
+  { 0x5a, 0x7e },
+  { 0x5b, 0x7e },
+  { 0x5b, 0x7f },
+  { 0x5c, 0x7f },
+  { 0x5d, 0x7e },
+  { 0x5e, 0x7e },
+  { 0x5e, 0x7f },
+  { 0x5f, 0x7f },
+  { 0x60, 0x7e },
+  { 0x61, 0x7e },
+  { 0x61, 0x7f },
+  { 0x62, 0x7f },
+  { 0x63, 0x7e },
+  { 0x64, 0x7e },
+  { 0x64, 0x7f },
+  { 0x65, 0x7f },
+  { 0x66, 0x7e },
+  { 0x67, 0x7e },
+  { 0x67, 0x7f },
+  { 0x68, 0x7f },
+  { 0x69, 0x7e },
+  { 0x6a, 0x7e },
+  { 0x6a, 0x7f },
+  { 0x6b, 0x7f },
+  { 0x6c, 0x7e },
+  { 0x6d, 0x7d },
+  { 0x6d, 0x7f },
+  { 0x6e, 0x7e },
+  { 0x6f, 0x7e },
+  { 0x6f, 0x7f },
+  { 0x70, 0x7f },
+  { 0x71, 0x7e },
+  { 0x72, 0x7e },
+  { 0x72, 0x7f },
+  { 0x73, 0x7f },
+  { 0x74, 0x7e },
+  { 0x75, 0x7e },
+  { 0x75, 0x7f },
+  { 0x76, 0x7f },
+  { 0x77, 0x7e },
+  { 0x78, 0x7e },
+  { 0x78, 0x7f },
+  { 0x79, 0x7f },
+  { 0x7a, 0x7e },
+  { 0x7b, 0x7e },
+  { 0x7b, 0x7f },
+  { 0x7c, 0x7f },
+  { 0x7d, 0x7e },
+  { 0x7e, 0x7e },
+  { 0x7e, 0x7f },
+  { 0x7f, 0x7f }
 };
 
 // For each value, we give the best possible compression range for that value with 5 bits.
@@ -389,605 +389,605 @@ static unsigned char Optimal7CompressBC7Mode5[256][2] = {
 //
 // The following tables were generated with the following program:
 /*
- 	UINT nbits = 5;
-	UINT lastNum = -1;
-	UINT vals[255];
-	UINT valIdx = 0;
-	for(UINT i = 0; i < 256; i++) {
-		UINT num = (i >> (8 - nbits));
-		num <<= (8-nbits);
-		num |= i >> nbits;
+   UINT nbits = 5;
+  UINT lastNum = -1;
+  UINT vals[255];
+  UINT valIdx = 0;
+  for(UINT i = 0; i < 256; i++) {
+    UINT num = (i >> (8 - nbits));
+    num <<= (8-nbits);
+    num |= i >> nbits;
 
-		if(num != lastNum) {
-			lastNum = num;
-			vals[valIdx++] = num;
-		}
-	}
+    if(num != lastNum) {
+      lastNum = num;
+      vals[valIdx++] = num;
+    }
+  }
 
-	for(UINT i = 0; i < 256; i++) {
+  for(UINT i = 0; i < 256; i++) {
 
-		UINT mindist = 0xFFFFFFFF;
-		UINT minj = 0, mink = 0;
+    UINT mindist = 0xFFFFFFFF;
+    UINT minj = 0, mink = 0;
 
-		UINT tableEntry[2][4] = { {1, 0, 0, 0xFFFFFFFF}, {0, 0, 0, 0xFFFFFFFF} };
+    UINT tableEntry[2][4] = { {1, 0, 0, 0xFFFFFFFF}, {0, 0, 0, 0xFFFFFFFF} };
 
-		for(UINT j = 0; j < valIdx; j++) {
-			for(UINT k = j; k < valIdx ; k++) {
+    for(UINT j = 0; j < valIdx; j++) {
+      for(UINT k = j; k < valIdx ; k++) {
 
-				UINT combo = (vals[j] + vals[k]) / 2;
-				UINT dist = ((i > combo) ? i - combo : combo - i);
-				if( dist < mindist )
-				{
-					mindist = dist;
-					minj = j;
-					mink = k;
-				}
-			}
-		}
+        UINT combo = (vals[j] + vals[k]) / 2;
+        UINT dist = ((i > combo) ? i - combo : combo - i);
+        if( dist < mindist )
+        {
+          mindist = dist;
+          minj = j;
+          mink = k;
+        }
+      }
+    }
 
-		tableEntry[0][1] = vals[minj];
-		tableEntry[0][2] = vals[mink];
-		tableEntry[0][3] = mindist;
+    tableEntry[0][1] = vals[minj];
+    tableEntry[0][2] = vals[mink];
+    tableEntry[0][3] = mindist;
 
-		mindist = 0xFFFFFFFF;
-		minj = 0, mink = 0;
+    mindist = 0xFFFFFFFF;
+    minj = 0, mink = 0;
 
-		for(UINT j = 0; j < valIdx; j++) {
-			for(UINT k = j; k < valIdx ; k++) {
+    for(UINT j = 0; j < valIdx; j++) {
+      for(UINT k = j; k < valIdx ; k++) {
 
-				UINT combo = (2 * vals[j] + vals[k]) / 3;
-				UINT dist = ((i > combo) ? i - combo : combo - i);
-				if( dist < mindist )
-				{
-					mindist = dist;
-					minj = j;
-					mink = k;
-				}
-			}
-		}
+        UINT combo = (2 * vals[j] + vals[k]) / 3;
+        UINT dist = ((i > combo) ? i - combo : combo - i);
+        if( dist < mindist )
+        {
+          mindist = dist;
+          minj = j;
+          mink = k;
+        }
+      }
+    }
 
-		tableEntry[1][1] = vals[minj];
-		tableEntry[1][2] = vals[mink];
-		tableEntry[1][3] = mindist;
+    tableEntry[1][1] = vals[minj];
+    tableEntry[1][2] = vals[mink];
+    tableEntry[1][3] = mindist;
 
-		wchar_t tableEntryStr[256];
-		if(tableEntry[1][3] > tableEntry[0][3]) {
-			swprintf(tableEntryStr, 256, L"{ { %d, 0x%02x, 0x%02x }, { %d, 0x%02x, 0x%02x } },\n", 
-				tableEntry[0][0],
-				tableEntry[0][1] >> (8 - nbits),
-				tableEntry[0][2] >> (8 - nbits),
-				tableEntry[1][0],
-				tableEntry[1][1] >> (8 - nbits),
-				tableEntry[1][2] >> (8 - nbits)
-			);
-		}
-		else {
-			swprintf(tableEntryStr, 256, L"{ { %d, 0x%02x, 0x%02x }, { %d, 0x%02x, 0x%02x } },\n", 
-				tableEntry[1][0],
-				tableEntry[1][1] >> (8 - nbits),
-				tableEntry[1][2] >> (8 - nbits),
-				tableEntry[0][0],
-				tableEntry[0][1] >> (8 - nbits),
-				tableEntry[0][2] >> (8 - nbits)
-			);
-		}
-		OutputDebugString(tableEntryStr);
-	}
+    wchar_t tableEntryStr[256];
+    if(tableEntry[1][3] > tableEntry[0][3]) {
+      swprintf(tableEntryStr, 256, L"{ { %d, 0x%02x, 0x%02x }, { %d, 0x%02x, 0x%02x } },\n", 
+        tableEntry[0][0],
+        tableEntry[0][1] >> (8 - nbits),
+        tableEntry[0][2] >> (8 - nbits),
+        tableEntry[1][0],
+        tableEntry[1][1] >> (8 - nbits),
+        tableEntry[1][2] >> (8 - nbits)
+      );
+    }
+    else {
+      swprintf(tableEntryStr, 256, L"{ { %d, 0x%02x, 0x%02x }, { %d, 0x%02x, 0x%02x } },\n", 
+        tableEntry[1][0],
+        tableEntry[1][1] >> (8 - nbits),
+        tableEntry[1][2] >> (8 - nbits),
+        tableEntry[0][0],
+        tableEntry[0][1] >> (8 - nbits),
+        tableEntry[0][2] >> (8 - nbits)
+      );
+    }
+    OutputDebugString(tableEntryStr);
+  }
 static unsigned char Optimal5CompressDXT1[256][2][3] = {
-	{ { 0, 0x00, 0x00 }, { 1, 0x00, 0x00 } },
-	{ { 0, 0x00, 0x00 }, { 1, 0x00, 0x00 } },
-	{ { 0, 0x00, 0x01 }, { 1, 0x00, 0x00 } },
-	{ { 0, 0x00, 0x01 }, { 1, 0x00, 0x01 } },
-	{ { 1, 0x00, 0x01 }, { 0, 0x00, 0x02 } },
-	{ { 0, 0x00, 0x02 }, { 1, 0x00, 0x01 } },
-	{ { 0, 0x00, 0x02 }, { 1, 0x00, 0x01 } },
-	{ { 0, 0x00, 0x03 }, { 1, 0x00, 0x02 } },
-	{ { 0, 0x00, 0x03 }, { 1, 0x00, 0x02 } },
-	{ { 0, 0x00, 0x03 }, { 1, 0x00, 0x02 } },
-	{ { 0, 0x01, 0x02 }, { 1, 0x00, 0x02 } },
-	{ { 0, 0x00, 0x04 }, { 1, 0x00, 0x03 } },
-	{ { 1, 0x00, 0x03 }, { 0, 0x00, 0x04 } },
-	{ { 0, 0x00, 0x05 }, { 1, 0x00, 0x03 } },
-	{ { 0, 0x00, 0x05 }, { 1, 0x00, 0x03 } },
-	{ { 0, 0x00, 0x06 }, { 1, 0x00, 0x04 } },
-	{ { 0, 0x00, 0x06 }, { 1, 0x00, 0x04 } },
-	{ { 0, 0x00, 0x06 }, { 1, 0x00, 0x04 } },
-	{ { 0, 0x02, 0x03 }, { 1, 0x00, 0x04 } },
-	{ { 0, 0x00, 0x07 }, { 1, 0x00, 0x05 } },
-	{ { 1, 0x00, 0x05 }, { 0, 0x00, 0x07 } },
-	{ { 0, 0x01, 0x06 }, { 1, 0x00, 0x05 } },
-	{ { 0, 0x00, 0x08 }, { 1, 0x00, 0x05 } },
-	{ { 0, 0x00, 0x08 }, { 1, 0x00, 0x06 } },
-	{ { 0, 0x00, 0x09 }, { 1, 0x00, 0x06 } },
-	{ { 0, 0x00, 0x09 }, { 1, 0x00, 0x06 } },
-	{ { 0, 0x00, 0x0a }, { 1, 0x00, 0x06 } },
-	{ { 0, 0x00, 0x0a }, { 1, 0x00, 0x07 } },
-	{ { 1, 0x00, 0x07 }, { 0, 0x00, 0x0a } },
-	{ { 0, 0x02, 0x07 }, { 1, 0x00, 0x07 } },
-	{ { 0, 0x00, 0x0b }, { 1, 0x00, 0x07 } },
-	{ { 0, 0x00, 0x0b }, { 1, 0x01, 0x07 } },
-	{ { 0, 0x01, 0x0a }, { 1, 0x01, 0x07 } },
-	{ { 0, 0x00, 0x0c }, { 1, 0x00, 0x08 } },
-	{ { 0, 0x00, 0x0c }, { 1, 0x00, 0x08 } },
-	{ { 0, 0x00, 0x0d }, { 1, 0x02, 0x07 } },
-	{ { 1, 0x02, 0x07 }, { 0, 0x00, 0x0d } },
-	{ { 1, 0x00, 0x09 }, { 0, 0x00, 0x0e } },
-	{ { 0, 0x00, 0x0e }, { 1, 0x00, 0x09 } },
-	{ { 0, 0x00, 0x0e }, { 1, 0x03, 0x07 } },
-	{ { 0, 0x02, 0x0b }, { 1, 0x03, 0x07 } },
-	{ { 0, 0x00, 0x0f }, { 1, 0x00, 0x0a } },
-	{ { 0, 0x00, 0x0f }, { 1, 0x00, 0x0a } },
-	{ { 0, 0x01, 0x0e }, { 1, 0x00, 0x0a } },
-	{ { 0, 0x00, 0x10 }, { 1, 0x00, 0x0b } },
-	{ { 1, 0x00, 0x0b }, { 0, 0x00, 0x10 } },
-	{ { 0, 0x00, 0x11 }, { 1, 0x00, 0x0b } },
-	{ { 0, 0x00, 0x11 }, { 1, 0x00, 0x0b } },
-	{ { 0, 0x00, 0x12 }, { 1, 0x00, 0x0c } },
-	{ { 0, 0x00, 0x12 }, { 1, 0x00, 0x0c } },
-	{ { 0, 0x00, 0x12 }, { 1, 0x00, 0x0c } },
-	{ { 0, 0x02, 0x0f }, { 1, 0x00, 0x0c } },
-	{ { 0, 0x00, 0x13 }, { 1, 0x00, 0x0d } },
-	{ { 1, 0x00, 0x0d }, { 0, 0x00, 0x13 } },
-	{ { 0, 0x01, 0x12 }, { 1, 0x00, 0x0d } },
-	{ { 0, 0x00, 0x14 }, { 1, 0x00, 0x0d } },
-	{ { 0, 0x00, 0x14 }, { 1, 0x00, 0x0e } },
-	{ { 0, 0x00, 0x15 }, { 1, 0x00, 0x0e } },
-	{ { 0, 0x00, 0x15 }, { 1, 0x00, 0x0e } },
-	{ { 0, 0x00, 0x16 }, { 1, 0x00, 0x0e } },
-	{ { 0, 0x00, 0x16 }, { 1, 0x00, 0x0f } },
-	{ { 1, 0x00, 0x0f }, { 0, 0x00, 0x16 } },
-	{ { 0, 0x02, 0x13 }, { 1, 0x00, 0x0f } },
-	{ { 0, 0x00, 0x17 }, { 1, 0x00, 0x0f } },
-	{ { 0, 0x00, 0x17 }, { 1, 0x01, 0x0f } },
-	{ { 0, 0x01, 0x16 }, { 1, 0x01, 0x0f } },
-	{ { 0, 0x00, 0x18 }, { 1, 0x00, 0x10 } },
-	{ { 0, 0x00, 0x18 }, { 1, 0x00, 0x10 } },
-	{ { 0, 0x00, 0x19 }, { 1, 0x02, 0x0f } },
-	{ { 1, 0x02, 0x0f }, { 0, 0x00, 0x19 } },
-	{ { 1, 0x00, 0x11 }, { 0, 0x00, 0x1a } },
-	{ { 0, 0x00, 0x1a }, { 1, 0x00, 0x11 } },
-	{ { 0, 0x00, 0x1a }, { 1, 0x03, 0x0f } },
-	{ { 0, 0x02, 0x17 }, { 1, 0x03, 0x0f } },
-	{ { 0, 0x00, 0x1b }, { 1, 0x00, 0x12 } },
-	{ { 0, 0x00, 0x1b }, { 1, 0x00, 0x12 } },
-	{ { 0, 0x01, 0x1a }, { 1, 0x00, 0x12 } },
-	{ { 0, 0x00, 0x1c }, { 1, 0x00, 0x13 } },
-	{ { 1, 0x00, 0x13 }, { 0, 0x00, 0x1c } },
-	{ { 0, 0x00, 0x1d }, { 1, 0x00, 0x13 } },
-	{ { 0, 0x00, 0x1d }, { 1, 0x00, 0x13 } },
-	{ { 0, 0x00, 0x1e }, { 1, 0x00, 0x14 } },
-	{ { 0, 0x00, 0x1e }, { 1, 0x00, 0x14 } },
-	{ { 0, 0x00, 0x1e }, { 1, 0x00, 0x14 } },
-	{ { 0, 0x02, 0x1b }, { 1, 0x00, 0x14 } },
-	{ { 0, 0x00, 0x1f }, { 1, 0x00, 0x15 } },
-	{ { 1, 0x00, 0x15 }, { 0, 0x00, 0x1f } },
-	{ { 0, 0x01, 0x1e }, { 1, 0x00, 0x15 } },
-	{ { 0, 0x04, 0x18 }, { 1, 0x00, 0x15 } },
-	{ { 0, 0x01, 0x1f }, { 1, 0x00, 0x16 } },
-	{ { 0, 0x01, 0x1f }, { 1, 0x00, 0x16 } },
-	{ { 0, 0x01, 0x1f }, { 1, 0x00, 0x16 } },
-	{ { 0, 0x02, 0x1e }, { 1, 0x00, 0x16 } },
-	{ { 0, 0x02, 0x1e }, { 1, 0x00, 0x17 } },
-	{ { 1, 0x00, 0x17 }, { 0, 0x02, 0x1e } },
-	{ { 0, 0x02, 0x1f }, { 1, 0x00, 0x17 } },
-	{ { 0, 0x04, 0x1b }, { 1, 0x00, 0x17 } },
-	{ { 0, 0x03, 0x1e }, { 1, 0x01, 0x17 } },
-	{ { 0, 0x03, 0x1e }, { 1, 0x01, 0x17 } },
-	{ { 0, 0x04, 0x1c }, { 1, 0x00, 0x18 } },
-	{ { 0, 0x03, 0x1f }, { 1, 0x00, 0x18 } },
-	{ { 0, 0x03, 0x1f }, { 1, 0x02, 0x17 } },
-	{ { 1, 0x02, 0x17 }, { 0, 0x03, 0x1f } },
-	{ { 1, 0x00, 0x19 }, { 0, 0x04, 0x1e } },
-	{ { 0, 0x04, 0x1e }, { 1, 0x00, 0x19 } },
-	{ { 0, 0x04, 0x1e }, { 1, 0x03, 0x17 } },
-	{ { 0, 0x06, 0x1b }, { 1, 0x03, 0x17 } },
-	{ { 0, 0x04, 0x1f }, { 1, 0x00, 0x1a } },
-	{ { 0, 0x04, 0x1f }, { 1, 0x00, 0x1a } },
-	{ { 0, 0x05, 0x1e }, { 1, 0x00, 0x1a } },
-	{ { 0, 0x08, 0x18 }, { 1, 0x00, 0x1b } },
-	{ { 1, 0x00, 0x1b }, { 0, 0x05, 0x1f } },
-	{ { 0, 0x05, 0x1f }, { 1, 0x00, 0x1b } },
-	{ { 0, 0x05, 0x1f }, { 1, 0x00, 0x1b } },
-	{ { 0, 0x06, 0x1e }, { 1, 0x00, 0x1c } },
-	{ { 0, 0x06, 0x1e }, { 1, 0x00, 0x1c } },
-	{ { 0, 0x06, 0x1e }, { 1, 0x00, 0x1c } },
-	{ { 0, 0x06, 0x1f }, { 1, 0x00, 0x1c } },
-	{ { 0, 0x08, 0x1b }, { 1, 0x00, 0x1d } },
-	{ { 1, 0x00, 0x1d }, { 0, 0x07, 0x1e } },
-	{ { 0, 0x07, 0x1e }, { 1, 0x00, 0x1d } },
-	{ { 0, 0x08, 0x1c }, { 1, 0x00, 0x1d } },
-	{ { 0, 0x07, 0x1f }, { 1, 0x00, 0x1e } },
-	{ { 0, 0x07, 0x1f }, { 1, 0x00, 0x1e } },
-	{ { 0, 0x07, 0x1f }, { 1, 0x00, 0x1e } },
-	{ { 0, 0x08, 0x1e }, { 1, 0x00, 0x1e } },
-	{ { 0, 0x08, 0x1e }, { 1, 0x00, 0x1f } },
-	{ { 1, 0x00, 0x1f }, { 0, 0x08, 0x1e } },
-	{ { 0, 0x0a, 0x1b }, { 1, 0x00, 0x1f } },
-	{ { 0, 0x08, 0x1f }, { 1, 0x00, 0x1f } },
-	{ { 0, 0x08, 0x1f }, { 1, 0x01, 0x1f } },
-	{ { 0, 0x09, 0x1e }, { 1, 0x01, 0x1f } },
-	{ { 0, 0x0c, 0x18 }, { 1, 0x04, 0x1c } },
-	{ { 0, 0x09, 0x1f }, { 1, 0x04, 0x1c } },
-	{ { 0, 0x09, 0x1f }, { 1, 0x02, 0x1f } },
-	{ { 1, 0x02, 0x1f }, { 0, 0x09, 0x1f } },
-	{ { 1, 0x04, 0x1d }, { 0, 0x0a, 0x1e } },
-	{ { 0, 0x0a, 0x1e }, { 1, 0x04, 0x1d } },
-	{ { 0, 0x0a, 0x1e }, { 1, 0x03, 0x1f } },
-	{ { 0, 0x0a, 0x1f }, { 1, 0x03, 0x1f } },
-	{ { 0, 0x0c, 0x1b }, { 1, 0x04, 0x1e } },
-	{ { 0, 0x0b, 0x1e }, { 1, 0x04, 0x1e } },
-	{ { 0, 0x0b, 0x1e }, { 1, 0x04, 0x1e } },
-	{ { 0, 0x0c, 0x1c }, { 1, 0x04, 0x1f } },
-	{ { 1, 0x04, 0x1f }, { 0, 0x0b, 0x1f } },
-	{ { 0, 0x0b, 0x1f }, { 1, 0x04, 0x1f } },
-	{ { 0, 0x0b, 0x1f }, { 1, 0x04, 0x1f } },
-	{ { 0, 0x0c, 0x1e }, { 1, 0x05, 0x1f } },
-	{ { 0, 0x0c, 0x1e }, { 1, 0x05, 0x1f } },
-	{ { 0, 0x0c, 0x1e }, { 1, 0x05, 0x1f } },
-	{ { 0, 0x0e, 0x1b }, { 1, 0x05, 0x1f } },
-	{ { 0, 0x0c, 0x1f }, { 1, 0x06, 0x1f } },
-	{ { 1, 0x06, 0x1f }, { 0, 0x0c, 0x1f } },
-	{ { 0, 0x0d, 0x1e }, { 1, 0x06, 0x1f } },
-	{ { 0, 0x10, 0x18 }, { 1, 0x06, 0x1f } },
-	{ { 0, 0x0d, 0x1f }, { 1, 0x07, 0x1f } },
-	{ { 0, 0x0d, 0x1f }, { 1, 0x07, 0x1f } },
-	{ { 0, 0x0d, 0x1f }, { 1, 0x07, 0x1f } },
-	{ { 0, 0x0e, 0x1e }, { 1, 0x07, 0x1f } },
-	{ { 0, 0x0e, 0x1e }, { 1, 0x08, 0x1f } },
-	{ { 1, 0x08, 0x1f }, { 0, 0x0e, 0x1e } },
-	{ { 0, 0x0e, 0x1f }, { 1, 0x08, 0x1f } },
-	{ { 0, 0x10, 0x1b }, { 1, 0x08, 0x1f } },
-	{ { 0, 0x0f, 0x1e }, { 1, 0x09, 0x1f } },
-	{ { 0, 0x0f, 0x1e }, { 1, 0x09, 0x1f } },
-	{ { 0, 0x10, 0x1c }, { 1, 0x0c, 0x1c } },
-	{ { 0, 0x0f, 0x1f }, { 1, 0x0c, 0x1c } },
-	{ { 0, 0x0f, 0x1f }, { 1, 0x0a, 0x1f } },
-	{ { 1, 0x0a, 0x1f }, { 0, 0x0f, 0x1f } },
-	{ { 1, 0x0c, 0x1d }, { 0, 0x10, 0x1e } },
-	{ { 0, 0x10, 0x1e }, { 1, 0x0c, 0x1d } },
-	{ { 0, 0x10, 0x1e }, { 1, 0x0b, 0x1f } },
-	{ { 0, 0x12, 0x1b }, { 1, 0x0b, 0x1f } },
-	{ { 0, 0x10, 0x1f }, { 1, 0x0c, 0x1e } },
-	{ { 0, 0x10, 0x1f }, { 1, 0x0c, 0x1e } },
-	{ { 0, 0x11, 0x1e }, { 1, 0x0c, 0x1e } },
-	{ { 0, 0x14, 0x18 }, { 1, 0x0c, 0x1f } },
-	{ { 1, 0x0c, 0x1f }, { 0, 0x11, 0x1f } },
-	{ { 0, 0x11, 0x1f }, { 1, 0x0c, 0x1f } },
-	{ { 0, 0x11, 0x1f }, { 1, 0x0c, 0x1f } },
-	{ { 0, 0x12, 0x1e }, { 1, 0x0d, 0x1f } },
-	{ { 0, 0x12, 0x1e }, { 1, 0x0d, 0x1f } },
-	{ { 0, 0x12, 0x1e }, { 1, 0x0d, 0x1f } },
-	{ { 0, 0x12, 0x1f }, { 1, 0x0d, 0x1f } },
-	{ { 0, 0x14, 0x1b }, { 1, 0x0e, 0x1f } },
-	{ { 1, 0x0e, 0x1f }, { 0, 0x13, 0x1e } },
-	{ { 0, 0x13, 0x1e }, { 1, 0x0e, 0x1f } },
-	{ { 0, 0x14, 0x1c }, { 1, 0x0e, 0x1f } },
-	{ { 0, 0x13, 0x1f }, { 1, 0x0f, 0x1f } },
-	{ { 0, 0x13, 0x1f }, { 1, 0x0f, 0x1f } },
-	{ { 0, 0x13, 0x1f }, { 1, 0x0f, 0x1f } },
-	{ { 0, 0x14, 0x1e }, { 1, 0x0f, 0x1f } },
-	{ { 0, 0x14, 0x1e }, { 1, 0x10, 0x1f } },
-	{ { 1, 0x10, 0x1f }, { 0, 0x14, 0x1e } },
-	{ { 0, 0x16, 0x1b }, { 1, 0x10, 0x1f } },
-	{ { 0, 0x14, 0x1f }, { 1, 0x10, 0x1f } },
-	{ { 0, 0x14, 0x1f }, { 1, 0x11, 0x1f } },
-	{ { 0, 0x15, 0x1e }, { 1, 0x11, 0x1f } },
-	{ { 0, 0x18, 0x18 }, { 1, 0x14, 0x1c } },
-	{ { 0, 0x15, 0x1f }, { 1, 0x14, 0x1c } },
-	{ { 0, 0x15, 0x1f }, { 1, 0x12, 0x1f } },
-	{ { 1, 0x12, 0x1f }, { 0, 0x15, 0x1f } },
-	{ { 1, 0x14, 0x1d }, { 0, 0x16, 0x1e } },
-	{ { 0, 0x16, 0x1e }, { 1, 0x14, 0x1d } },
-	{ { 0, 0x16, 0x1e }, { 1, 0x13, 0x1f } },
-	{ { 0, 0x16, 0x1f }, { 1, 0x13, 0x1f } },
-	{ { 0, 0x18, 0x1b }, { 1, 0x14, 0x1e } },
-	{ { 0, 0x17, 0x1e }, { 1, 0x14, 0x1e } },
-	{ { 0, 0x17, 0x1e }, { 1, 0x14, 0x1e } },
-	{ { 0, 0x18, 0x1c }, { 1, 0x14, 0x1f } },
-	{ { 1, 0x14, 0x1f }, { 0, 0x17, 0x1f } },
-	{ { 0, 0x17, 0x1f }, { 1, 0x14, 0x1f } },
-	{ { 0, 0x17, 0x1f }, { 1, 0x14, 0x1f } },
-	{ { 0, 0x18, 0x1e }, { 1, 0x15, 0x1f } },
-	{ { 0, 0x18, 0x1e }, { 1, 0x15, 0x1f } },
-	{ { 0, 0x18, 0x1e }, { 1, 0x15, 0x1f } },
-	{ { 0, 0x1a, 0x1b }, { 1, 0x15, 0x1f } },
-	{ { 0, 0x18, 0x1f }, { 1, 0x16, 0x1f } },
-	{ { 1, 0x16, 0x1f }, { 0, 0x18, 0x1f } },
-	{ { 0, 0x19, 0x1e }, { 1, 0x16, 0x1f } },
-	{ { 0, 0x19, 0x1e }, { 1, 0x16, 0x1f } },
-	{ { 0, 0x19, 0x1f }, { 1, 0x17, 0x1f } },
-	{ { 0, 0x19, 0x1f }, { 1, 0x17, 0x1f } },
-	{ { 0, 0x19, 0x1f }, { 1, 0x17, 0x1f } },
-	{ { 0, 0x1a, 0x1e }, { 1, 0x17, 0x1f } },
-	{ { 0, 0x1a, 0x1e }, { 1, 0x18, 0x1f } },
-	{ { 1, 0x18, 0x1f }, { 0, 0x1a, 0x1e } },
-	{ { 0, 0x1a, 0x1f }, { 1, 0x18, 0x1f } },
-	{ { 0, 0x1a, 0x1f }, { 1, 0x18, 0x1f } },
-	{ { 0, 0x1b, 0x1e }, { 1, 0x19, 0x1f } },
-	{ { 0, 0x1b, 0x1e }, { 1, 0x19, 0x1f } },
-	{ { 0, 0x1c, 0x1c }, { 1, 0x1c, 0x1c } },
-	{ { 0, 0x1b, 0x1f }, { 1, 0x1c, 0x1c } },
-	{ { 0, 0x1b, 0x1f }, { 1, 0x1a, 0x1f } },
-	{ { 1, 0x1a, 0x1f }, { 0, 0x1b, 0x1f } },
-	{ { 1, 0x1c, 0x1d }, { 0, 0x1c, 0x1e } },
-	{ { 0, 0x1c, 0x1e }, { 1, 0x1c, 0x1d } },
-	{ { 0, 0x1c, 0x1e }, { 1, 0x1b, 0x1f } },
-	{ { 1, 0x1b, 0x1f }, { 0, 0x1c, 0x1f } },
-	{ { 0, 0x1c, 0x1f }, { 1, 0x1c, 0x1e } },
-	{ { 0, 0x1c, 0x1f }, { 1, 0x1c, 0x1e } },
-	{ { 0, 0x1d, 0x1e }, { 1, 0x1c, 0x1e } },
-	{ { 0, 0x1d, 0x1e }, { 1, 0x1c, 0x1f } },
-	{ { 1, 0x1c, 0x1f }, { 0, 0x1d, 0x1f } },
-	{ { 0, 0x1d, 0x1f }, { 1, 0x1c, 0x1f } },
-	{ { 0, 0x1d, 0x1f }, { 1, 0x1c, 0x1f } },
-	{ { 0, 0x1e, 0x1e }, { 1, 0x1d, 0x1f } },
-	{ { 0, 0x1e, 0x1e }, { 1, 0x1d, 0x1f } },
-	{ { 0, 0x1e, 0x1e }, { 1, 0x1d, 0x1f } },
-	{ { 0, 0x1e, 0x1f }, { 1, 0x1d, 0x1f } },
-	{ { 0, 0x1e, 0x1f }, { 1, 0x1e, 0x1f } },
-	{ { 1, 0x1e, 0x1f }, { 0, 0x1e, 0x1f } },
-	{ { 1, 0x1e, 0x1f }, { 0, 0x1e, 0x1f } },
-	{ { 0, 0x1f, 0x1f }, { 1, 0x1e, 0x1f } },
-	{ { 0, 0x1f, 0x1f }, { 1, 0x1f, 0x1f } },
-	{ { 0, 0x1f, 0x1f }, { 1, 0x1f, 0x1f } }
+  { { 0, 0x00, 0x00 }, { 1, 0x00, 0x00 } },
+  { { 0, 0x00, 0x00 }, { 1, 0x00, 0x00 } },
+  { { 0, 0x00, 0x01 }, { 1, 0x00, 0x00 } },
+  { { 0, 0x00, 0x01 }, { 1, 0x00, 0x01 } },
+  { { 1, 0x00, 0x01 }, { 0, 0x00, 0x02 } },
+  { { 0, 0x00, 0x02 }, { 1, 0x00, 0x01 } },
+  { { 0, 0x00, 0x02 }, { 1, 0x00, 0x01 } },
+  { { 0, 0x00, 0x03 }, { 1, 0x00, 0x02 } },
+  { { 0, 0x00, 0x03 }, { 1, 0x00, 0x02 } },
+  { { 0, 0x00, 0x03 }, { 1, 0x00, 0x02 } },
+  { { 0, 0x01, 0x02 }, { 1, 0x00, 0x02 } },
+  { { 0, 0x00, 0x04 }, { 1, 0x00, 0x03 } },
+  { { 1, 0x00, 0x03 }, { 0, 0x00, 0x04 } },
+  { { 0, 0x00, 0x05 }, { 1, 0x00, 0x03 } },
+  { { 0, 0x00, 0x05 }, { 1, 0x00, 0x03 } },
+  { { 0, 0x00, 0x06 }, { 1, 0x00, 0x04 } },
+  { { 0, 0x00, 0x06 }, { 1, 0x00, 0x04 } },
+  { { 0, 0x00, 0x06 }, { 1, 0x00, 0x04 } },
+  { { 0, 0x02, 0x03 }, { 1, 0x00, 0x04 } },
+  { { 0, 0x00, 0x07 }, { 1, 0x00, 0x05 } },
+  { { 1, 0x00, 0x05 }, { 0, 0x00, 0x07 } },
+  { { 0, 0x01, 0x06 }, { 1, 0x00, 0x05 } },
+  { { 0, 0x00, 0x08 }, { 1, 0x00, 0x05 } },
+  { { 0, 0x00, 0x08 }, { 1, 0x00, 0x06 } },
+  { { 0, 0x00, 0x09 }, { 1, 0x00, 0x06 } },
+  { { 0, 0x00, 0x09 }, { 1, 0x00, 0x06 } },
+  { { 0, 0x00, 0x0a }, { 1, 0x00, 0x06 } },
+  { { 0, 0x00, 0x0a }, { 1, 0x00, 0x07 } },
+  { { 1, 0x00, 0x07 }, { 0, 0x00, 0x0a } },
+  { { 0, 0x02, 0x07 }, { 1, 0x00, 0x07 } },
+  { { 0, 0x00, 0x0b }, { 1, 0x00, 0x07 } },
+  { { 0, 0x00, 0x0b }, { 1, 0x01, 0x07 } },
+  { { 0, 0x01, 0x0a }, { 1, 0x01, 0x07 } },
+  { { 0, 0x00, 0x0c }, { 1, 0x00, 0x08 } },
+  { { 0, 0x00, 0x0c }, { 1, 0x00, 0x08 } },
+  { { 0, 0x00, 0x0d }, { 1, 0x02, 0x07 } },
+  { { 1, 0x02, 0x07 }, { 0, 0x00, 0x0d } },
+  { { 1, 0x00, 0x09 }, { 0, 0x00, 0x0e } },
+  { { 0, 0x00, 0x0e }, { 1, 0x00, 0x09 } },
+  { { 0, 0x00, 0x0e }, { 1, 0x03, 0x07 } },
+  { { 0, 0x02, 0x0b }, { 1, 0x03, 0x07 } },
+  { { 0, 0x00, 0x0f }, { 1, 0x00, 0x0a } },
+  { { 0, 0x00, 0x0f }, { 1, 0x00, 0x0a } },
+  { { 0, 0x01, 0x0e }, { 1, 0x00, 0x0a } },
+  { { 0, 0x00, 0x10 }, { 1, 0x00, 0x0b } },
+  { { 1, 0x00, 0x0b }, { 0, 0x00, 0x10 } },
+  { { 0, 0x00, 0x11 }, { 1, 0x00, 0x0b } },
+  { { 0, 0x00, 0x11 }, { 1, 0x00, 0x0b } },
+  { { 0, 0x00, 0x12 }, { 1, 0x00, 0x0c } },
+  { { 0, 0x00, 0x12 }, { 1, 0x00, 0x0c } },
+  { { 0, 0x00, 0x12 }, { 1, 0x00, 0x0c } },
+  { { 0, 0x02, 0x0f }, { 1, 0x00, 0x0c } },
+  { { 0, 0x00, 0x13 }, { 1, 0x00, 0x0d } },
+  { { 1, 0x00, 0x0d }, { 0, 0x00, 0x13 } },
+  { { 0, 0x01, 0x12 }, { 1, 0x00, 0x0d } },
+  { { 0, 0x00, 0x14 }, { 1, 0x00, 0x0d } },
+  { { 0, 0x00, 0x14 }, { 1, 0x00, 0x0e } },
+  { { 0, 0x00, 0x15 }, { 1, 0x00, 0x0e } },
+  { { 0, 0x00, 0x15 }, { 1, 0x00, 0x0e } },
+  { { 0, 0x00, 0x16 }, { 1, 0x00, 0x0e } },
+  { { 0, 0x00, 0x16 }, { 1, 0x00, 0x0f } },
+  { { 1, 0x00, 0x0f }, { 0, 0x00, 0x16 } },
+  { { 0, 0x02, 0x13 }, { 1, 0x00, 0x0f } },
+  { { 0, 0x00, 0x17 }, { 1, 0x00, 0x0f } },
+  { { 0, 0x00, 0x17 }, { 1, 0x01, 0x0f } },
+  { { 0, 0x01, 0x16 }, { 1, 0x01, 0x0f } },
+  { { 0, 0x00, 0x18 }, { 1, 0x00, 0x10 } },
+  { { 0, 0x00, 0x18 }, { 1, 0x00, 0x10 } },
+  { { 0, 0x00, 0x19 }, { 1, 0x02, 0x0f } },
+  { { 1, 0x02, 0x0f }, { 0, 0x00, 0x19 } },
+  { { 1, 0x00, 0x11 }, { 0, 0x00, 0x1a } },
+  { { 0, 0x00, 0x1a }, { 1, 0x00, 0x11 } },
+  { { 0, 0x00, 0x1a }, { 1, 0x03, 0x0f } },
+  { { 0, 0x02, 0x17 }, { 1, 0x03, 0x0f } },
+  { { 0, 0x00, 0x1b }, { 1, 0x00, 0x12 } },
+  { { 0, 0x00, 0x1b }, { 1, 0x00, 0x12 } },
+  { { 0, 0x01, 0x1a }, { 1, 0x00, 0x12 } },
+  { { 0, 0x00, 0x1c }, { 1, 0x00, 0x13 } },
+  { { 1, 0x00, 0x13 }, { 0, 0x00, 0x1c } },
+  { { 0, 0x00, 0x1d }, { 1, 0x00, 0x13 } },
+  { { 0, 0x00, 0x1d }, { 1, 0x00, 0x13 } },
+  { { 0, 0x00, 0x1e }, { 1, 0x00, 0x14 } },
+  { { 0, 0x00, 0x1e }, { 1, 0x00, 0x14 } },
+  { { 0, 0x00, 0x1e }, { 1, 0x00, 0x14 } },
+  { { 0, 0x02, 0x1b }, { 1, 0x00, 0x14 } },
+  { { 0, 0x00, 0x1f }, { 1, 0x00, 0x15 } },
+  { { 1, 0x00, 0x15 }, { 0, 0x00, 0x1f } },
+  { { 0, 0x01, 0x1e }, { 1, 0x00, 0x15 } },
+  { { 0, 0x04, 0x18 }, { 1, 0x00, 0x15 } },
+  { { 0, 0x01, 0x1f }, { 1, 0x00, 0x16 } },
+  { { 0, 0x01, 0x1f }, { 1, 0x00, 0x16 } },
+  { { 0, 0x01, 0x1f }, { 1, 0x00, 0x16 } },
+  { { 0, 0x02, 0x1e }, { 1, 0x00, 0x16 } },
+  { { 0, 0x02, 0x1e }, { 1, 0x00, 0x17 } },
+  { { 1, 0x00, 0x17 }, { 0, 0x02, 0x1e } },
+  { { 0, 0x02, 0x1f }, { 1, 0x00, 0x17 } },
+  { { 0, 0x04, 0x1b }, { 1, 0x00, 0x17 } },
+  { { 0, 0x03, 0x1e }, { 1, 0x01, 0x17 } },
+  { { 0, 0x03, 0x1e }, { 1, 0x01, 0x17 } },
+  { { 0, 0x04, 0x1c }, { 1, 0x00, 0x18 } },
+  { { 0, 0x03, 0x1f }, { 1, 0x00, 0x18 } },
+  { { 0, 0x03, 0x1f }, { 1, 0x02, 0x17 } },
+  { { 1, 0x02, 0x17 }, { 0, 0x03, 0x1f } },
+  { { 1, 0x00, 0x19 }, { 0, 0x04, 0x1e } },
+  { { 0, 0x04, 0x1e }, { 1, 0x00, 0x19 } },
+  { { 0, 0x04, 0x1e }, { 1, 0x03, 0x17 } },
+  { { 0, 0x06, 0x1b }, { 1, 0x03, 0x17 } },
+  { { 0, 0x04, 0x1f }, { 1, 0x00, 0x1a } },
+  { { 0, 0x04, 0x1f }, { 1, 0x00, 0x1a } },
+  { { 0, 0x05, 0x1e }, { 1, 0x00, 0x1a } },
+  { { 0, 0x08, 0x18 }, { 1, 0x00, 0x1b } },
+  { { 1, 0x00, 0x1b }, { 0, 0x05, 0x1f } },
+  { { 0, 0x05, 0x1f }, { 1, 0x00, 0x1b } },
+  { { 0, 0x05, 0x1f }, { 1, 0x00, 0x1b } },
+  { { 0, 0x06, 0x1e }, { 1, 0x00, 0x1c } },
+  { { 0, 0x06, 0x1e }, { 1, 0x00, 0x1c } },
+  { { 0, 0x06, 0x1e }, { 1, 0x00, 0x1c } },
+  { { 0, 0x06, 0x1f }, { 1, 0x00, 0x1c } },
+  { { 0, 0x08, 0x1b }, { 1, 0x00, 0x1d } },
+  { { 1, 0x00, 0x1d }, { 0, 0x07, 0x1e } },
+  { { 0, 0x07, 0x1e }, { 1, 0x00, 0x1d } },
+  { { 0, 0x08, 0x1c }, { 1, 0x00, 0x1d } },
+  { { 0, 0x07, 0x1f }, { 1, 0x00, 0x1e } },
+  { { 0, 0x07, 0x1f }, { 1, 0x00, 0x1e } },
+  { { 0, 0x07, 0x1f }, { 1, 0x00, 0x1e } },
+  { { 0, 0x08, 0x1e }, { 1, 0x00, 0x1e } },
+  { { 0, 0x08, 0x1e }, { 1, 0x00, 0x1f } },
+  { { 1, 0x00, 0x1f }, { 0, 0x08, 0x1e } },
+  { { 0, 0x0a, 0x1b }, { 1, 0x00, 0x1f } },
+  { { 0, 0x08, 0x1f }, { 1, 0x00, 0x1f } },
+  { { 0, 0x08, 0x1f }, { 1, 0x01, 0x1f } },
+  { { 0, 0x09, 0x1e }, { 1, 0x01, 0x1f } },
+  { { 0, 0x0c, 0x18 }, { 1, 0x04, 0x1c } },
+  { { 0, 0x09, 0x1f }, { 1, 0x04, 0x1c } },
+  { { 0, 0x09, 0x1f }, { 1, 0x02, 0x1f } },
+  { { 1, 0x02, 0x1f }, { 0, 0x09, 0x1f } },
+  { { 1, 0x04, 0x1d }, { 0, 0x0a, 0x1e } },
+  { { 0, 0x0a, 0x1e }, { 1, 0x04, 0x1d } },
+  { { 0, 0x0a, 0x1e }, { 1, 0x03, 0x1f } },
+  { { 0, 0x0a, 0x1f }, { 1, 0x03, 0x1f } },
+  { { 0, 0x0c, 0x1b }, { 1, 0x04, 0x1e } },
+  { { 0, 0x0b, 0x1e }, { 1, 0x04, 0x1e } },
+  { { 0, 0x0b, 0x1e }, { 1, 0x04, 0x1e } },
+  { { 0, 0x0c, 0x1c }, { 1, 0x04, 0x1f } },
+  { { 1, 0x04, 0x1f }, { 0, 0x0b, 0x1f } },
+  { { 0, 0x0b, 0x1f }, { 1, 0x04, 0x1f } },
+  { { 0, 0x0b, 0x1f }, { 1, 0x04, 0x1f } },
+  { { 0, 0x0c, 0x1e }, { 1, 0x05, 0x1f } },
+  { { 0, 0x0c, 0x1e }, { 1, 0x05, 0x1f } },
+  { { 0, 0x0c, 0x1e }, { 1, 0x05, 0x1f } },
+  { { 0, 0x0e, 0x1b }, { 1, 0x05, 0x1f } },
+  { { 0, 0x0c, 0x1f }, { 1, 0x06, 0x1f } },
+  { { 1, 0x06, 0x1f }, { 0, 0x0c, 0x1f } },
+  { { 0, 0x0d, 0x1e }, { 1, 0x06, 0x1f } },
+  { { 0, 0x10, 0x18 }, { 1, 0x06, 0x1f } },
+  { { 0, 0x0d, 0x1f }, { 1, 0x07, 0x1f } },
+  { { 0, 0x0d, 0x1f }, { 1, 0x07, 0x1f } },
+  { { 0, 0x0d, 0x1f }, { 1, 0x07, 0x1f } },
+  { { 0, 0x0e, 0x1e }, { 1, 0x07, 0x1f } },
+  { { 0, 0x0e, 0x1e }, { 1, 0x08, 0x1f } },
+  { { 1, 0x08, 0x1f }, { 0, 0x0e, 0x1e } },
+  { { 0, 0x0e, 0x1f }, { 1, 0x08, 0x1f } },
+  { { 0, 0x10, 0x1b }, { 1, 0x08, 0x1f } },
+  { { 0, 0x0f, 0x1e }, { 1, 0x09, 0x1f } },
+  { { 0, 0x0f, 0x1e }, { 1, 0x09, 0x1f } },
+  { { 0, 0x10, 0x1c }, { 1, 0x0c, 0x1c } },
+  { { 0, 0x0f, 0x1f }, { 1, 0x0c, 0x1c } },
+  { { 0, 0x0f, 0x1f }, { 1, 0x0a, 0x1f } },
+  { { 1, 0x0a, 0x1f }, { 0, 0x0f, 0x1f } },
+  { { 1, 0x0c, 0x1d }, { 0, 0x10, 0x1e } },
+  { { 0, 0x10, 0x1e }, { 1, 0x0c, 0x1d } },
+  { { 0, 0x10, 0x1e }, { 1, 0x0b, 0x1f } },
+  { { 0, 0x12, 0x1b }, { 1, 0x0b, 0x1f } },
+  { { 0, 0x10, 0x1f }, { 1, 0x0c, 0x1e } },
+  { { 0, 0x10, 0x1f }, { 1, 0x0c, 0x1e } },
+  { { 0, 0x11, 0x1e }, { 1, 0x0c, 0x1e } },
+  { { 0, 0x14, 0x18 }, { 1, 0x0c, 0x1f } },
+  { { 1, 0x0c, 0x1f }, { 0, 0x11, 0x1f } },
+  { { 0, 0x11, 0x1f }, { 1, 0x0c, 0x1f } },
+  { { 0, 0x11, 0x1f }, { 1, 0x0c, 0x1f } },
+  { { 0, 0x12, 0x1e }, { 1, 0x0d, 0x1f } },
+  { { 0, 0x12, 0x1e }, { 1, 0x0d, 0x1f } },
+  { { 0, 0x12, 0x1e }, { 1, 0x0d, 0x1f } },
+  { { 0, 0x12, 0x1f }, { 1, 0x0d, 0x1f } },
+  { { 0, 0x14, 0x1b }, { 1, 0x0e, 0x1f } },
+  { { 1, 0x0e, 0x1f }, { 0, 0x13, 0x1e } },
+  { { 0, 0x13, 0x1e }, { 1, 0x0e, 0x1f } },
+  { { 0, 0x14, 0x1c }, { 1, 0x0e, 0x1f } },
+  { { 0, 0x13, 0x1f }, { 1, 0x0f, 0x1f } },
+  { { 0, 0x13, 0x1f }, { 1, 0x0f, 0x1f } },
+  { { 0, 0x13, 0x1f }, { 1, 0x0f, 0x1f } },
+  { { 0, 0x14, 0x1e }, { 1, 0x0f, 0x1f } },
+  { { 0, 0x14, 0x1e }, { 1, 0x10, 0x1f } },
+  { { 1, 0x10, 0x1f }, { 0, 0x14, 0x1e } },
+  { { 0, 0x16, 0x1b }, { 1, 0x10, 0x1f } },
+  { { 0, 0x14, 0x1f }, { 1, 0x10, 0x1f } },
+  { { 0, 0x14, 0x1f }, { 1, 0x11, 0x1f } },
+  { { 0, 0x15, 0x1e }, { 1, 0x11, 0x1f } },
+  { { 0, 0x18, 0x18 }, { 1, 0x14, 0x1c } },
+  { { 0, 0x15, 0x1f }, { 1, 0x14, 0x1c } },
+  { { 0, 0x15, 0x1f }, { 1, 0x12, 0x1f } },
+  { { 1, 0x12, 0x1f }, { 0, 0x15, 0x1f } },
+  { { 1, 0x14, 0x1d }, { 0, 0x16, 0x1e } },
+  { { 0, 0x16, 0x1e }, { 1, 0x14, 0x1d } },
+  { { 0, 0x16, 0x1e }, { 1, 0x13, 0x1f } },
+  { { 0, 0x16, 0x1f }, { 1, 0x13, 0x1f } },
+  { { 0, 0x18, 0x1b }, { 1, 0x14, 0x1e } },
+  { { 0, 0x17, 0x1e }, { 1, 0x14, 0x1e } },
+  { { 0, 0x17, 0x1e }, { 1, 0x14, 0x1e } },
+  { { 0, 0x18, 0x1c }, { 1, 0x14, 0x1f } },
+  { { 1, 0x14, 0x1f }, { 0, 0x17, 0x1f } },
+  { { 0, 0x17, 0x1f }, { 1, 0x14, 0x1f } },
+  { { 0, 0x17, 0x1f }, { 1, 0x14, 0x1f } },
+  { { 0, 0x18, 0x1e }, { 1, 0x15, 0x1f } },
+  { { 0, 0x18, 0x1e }, { 1, 0x15, 0x1f } },
+  { { 0, 0x18, 0x1e }, { 1, 0x15, 0x1f } },
+  { { 0, 0x1a, 0x1b }, { 1, 0x15, 0x1f } },
+  { { 0, 0x18, 0x1f }, { 1, 0x16, 0x1f } },
+  { { 1, 0x16, 0x1f }, { 0, 0x18, 0x1f } },
+  { { 0, 0x19, 0x1e }, { 1, 0x16, 0x1f } },
+  { { 0, 0x19, 0x1e }, { 1, 0x16, 0x1f } },
+  { { 0, 0x19, 0x1f }, { 1, 0x17, 0x1f } },
+  { { 0, 0x19, 0x1f }, { 1, 0x17, 0x1f } },
+  { { 0, 0x19, 0x1f }, { 1, 0x17, 0x1f } },
+  { { 0, 0x1a, 0x1e }, { 1, 0x17, 0x1f } },
+  { { 0, 0x1a, 0x1e }, { 1, 0x18, 0x1f } },
+  { { 1, 0x18, 0x1f }, { 0, 0x1a, 0x1e } },
+  { { 0, 0x1a, 0x1f }, { 1, 0x18, 0x1f } },
+  { { 0, 0x1a, 0x1f }, { 1, 0x18, 0x1f } },
+  { { 0, 0x1b, 0x1e }, { 1, 0x19, 0x1f } },
+  { { 0, 0x1b, 0x1e }, { 1, 0x19, 0x1f } },
+  { { 0, 0x1c, 0x1c }, { 1, 0x1c, 0x1c } },
+  { { 0, 0x1b, 0x1f }, { 1, 0x1c, 0x1c } },
+  { { 0, 0x1b, 0x1f }, { 1, 0x1a, 0x1f } },
+  { { 1, 0x1a, 0x1f }, { 0, 0x1b, 0x1f } },
+  { { 1, 0x1c, 0x1d }, { 0, 0x1c, 0x1e } },
+  { { 0, 0x1c, 0x1e }, { 1, 0x1c, 0x1d } },
+  { { 0, 0x1c, 0x1e }, { 1, 0x1b, 0x1f } },
+  { { 1, 0x1b, 0x1f }, { 0, 0x1c, 0x1f } },
+  { { 0, 0x1c, 0x1f }, { 1, 0x1c, 0x1e } },
+  { { 0, 0x1c, 0x1f }, { 1, 0x1c, 0x1e } },
+  { { 0, 0x1d, 0x1e }, { 1, 0x1c, 0x1e } },
+  { { 0, 0x1d, 0x1e }, { 1, 0x1c, 0x1f } },
+  { { 1, 0x1c, 0x1f }, { 0, 0x1d, 0x1f } },
+  { { 0, 0x1d, 0x1f }, { 1, 0x1c, 0x1f } },
+  { { 0, 0x1d, 0x1f }, { 1, 0x1c, 0x1f } },
+  { { 0, 0x1e, 0x1e }, { 1, 0x1d, 0x1f } },
+  { { 0, 0x1e, 0x1e }, { 1, 0x1d, 0x1f } },
+  { { 0, 0x1e, 0x1e }, { 1, 0x1d, 0x1f } },
+  { { 0, 0x1e, 0x1f }, { 1, 0x1d, 0x1f } },
+  { { 0, 0x1e, 0x1f }, { 1, 0x1e, 0x1f } },
+  { { 1, 0x1e, 0x1f }, { 0, 0x1e, 0x1f } },
+  { { 1, 0x1e, 0x1f }, { 0, 0x1e, 0x1f } },
+  { { 0, 0x1f, 0x1f }, { 1, 0x1e, 0x1f } },
+  { { 0, 0x1f, 0x1f }, { 1, 0x1f, 0x1f } },
+  { { 0, 0x1f, 0x1f }, { 1, 0x1f, 0x1f } }
 };
 */
 
 static unsigned char Optimal6CompressDXT1[256][2][3] = {
-	{ { 0, 0x00, 0x00 }, { 1, 0x00, 0x00 } },
-	{ { 0, 0x00, 0x01 }, { 1, 0x00, 0x00 } },
-	{ { 0, 0x00, 0x02 }, { 1, 0x00, 0x01 } },
-	{ { 0, 0x00, 0x02 }, { 1, 0x00, 0x01 } },
-	{ { 0, 0x00, 0x03 }, { 1, 0x00, 0x02 } },
-	{ { 0, 0x00, 0x04 }, { 1, 0x00, 0x02 } },
-	{ { 0, 0x00, 0x05 }, { 1, 0x00, 0x03 } },
-	{ { 0, 0x00, 0x05 }, { 1, 0x00, 0x03 } },
-	{ { 0, 0x00, 0x06 }, { 1, 0x00, 0x04 } },
-	{ { 0, 0x00, 0x07 }, { 1, 0x00, 0x04 } },
-	{ { 0, 0x00, 0x08 }, { 1, 0x00, 0x05 } },
-	{ { 0, 0x00, 0x08 }, { 1, 0x00, 0x05 } },
-	{ { 0, 0x00, 0x09 }, { 1, 0x00, 0x06 } },
-	{ { 0, 0x00, 0x0a }, { 1, 0x00, 0x06 } },
-	{ { 0, 0x00, 0x0b }, { 1, 0x00, 0x07 } },
-	{ { 0, 0x00, 0x0b }, { 1, 0x00, 0x07 } },
-	{ { 0, 0x00, 0x0c }, { 1, 0x00, 0x08 } },
-	{ { 0, 0x00, 0x0d }, { 1, 0x00, 0x08 } },
-	{ { 0, 0x00, 0x0e }, { 1, 0x00, 0x09 } },
-	{ { 0, 0x00, 0x0e }, { 1, 0x00, 0x09 } },
-	{ { 0, 0x00, 0x0f }, { 1, 0x00, 0x0a } },
-	{ { 0, 0x00, 0x10 }, { 1, 0x00, 0x0a } },
-	{ { 0, 0x01, 0x0f }, { 1, 0x00, 0x0b } },
-	{ { 0, 0x00, 0x11 }, { 1, 0x00, 0x0b } },
-	{ { 0, 0x00, 0x12 }, { 1, 0x00, 0x0c } },
-	{ { 0, 0x00, 0x13 }, { 1, 0x00, 0x0c } },
-	{ { 0, 0x03, 0x0e }, { 1, 0x00, 0x0d } },
-	{ { 0, 0x00, 0x14 }, { 1, 0x00, 0x0d } },
-	{ { 0, 0x00, 0x15 }, { 1, 0x00, 0x0e } },
-	{ { 0, 0x00, 0x16 }, { 1, 0x00, 0x0e } },
-	{ { 0, 0x04, 0x0f }, { 1, 0x00, 0x0f } },
-	{ { 0, 0x00, 0x17 }, { 1, 0x00, 0x0f } },
-	{ { 0, 0x00, 0x18 }, { 1, 0x00, 0x10 } },
-	{ { 0, 0x00, 0x19 }, { 1, 0x00, 0x10 } },
-	{ { 0, 0x06, 0x0e }, { 1, 0x00, 0x11 } },
-	{ { 0, 0x00, 0x1a }, { 1, 0x00, 0x11 } },
-	{ { 0, 0x00, 0x1b }, { 1, 0x00, 0x12 } },
-	{ { 0, 0x00, 0x1c }, { 1, 0x00, 0x12 } },
-	{ { 0, 0x07, 0x0f }, { 1, 0x00, 0x13 } },
-	{ { 0, 0x00, 0x1d }, { 1, 0x00, 0x13 } },
-	{ { 0, 0x00, 0x1e }, { 1, 0x00, 0x14 } },
-	{ { 0, 0x00, 0x1f }, { 1, 0x00, 0x14 } },
-	{ { 0, 0x09, 0x0e }, { 1, 0x00, 0x15 } },
-	{ { 0, 0x00, 0x20 }, { 1, 0x00, 0x15 } },
-	{ { 0, 0x00, 0x21 }, { 1, 0x00, 0x16 } },
-	{ { 0, 0x02, 0x1e }, { 1, 0x00, 0x16 } },
-	{ { 0, 0x00, 0x22 }, { 1, 0x00, 0x17 } },
-	{ { 0, 0x00, 0x23 }, { 1, 0x00, 0x17 } },
-	{ { 0, 0x00, 0x24 }, { 1, 0x00, 0x18 } },
-	{ { 0, 0x03, 0x1f }, { 1, 0x00, 0x18 } },
-	{ { 0, 0x00, 0x25 }, { 1, 0x00, 0x19 } },
-	{ { 0, 0x00, 0x26 }, { 1, 0x00, 0x19 } },
-	{ { 0, 0x00, 0x27 }, { 1, 0x00, 0x1a } },
-	{ { 0, 0x05, 0x1e }, { 1, 0x00, 0x1a } },
-	{ { 0, 0x00, 0x28 }, { 1, 0x00, 0x1b } },
-	{ { 0, 0x00, 0x29 }, { 1, 0x00, 0x1b } },
-	{ { 0, 0x00, 0x2a }, { 1, 0x00, 0x1c } },
-	{ { 0, 0x06, 0x1f }, { 1, 0x00, 0x1c } },
-	{ { 0, 0x00, 0x2b }, { 1, 0x00, 0x1d } },
-	{ { 0, 0x00, 0x2c }, { 1, 0x00, 0x1d } },
-	{ { 0, 0x00, 0x2d }, { 1, 0x00, 0x1e } },
-	{ { 0, 0x08, 0x1e }, { 1, 0x00, 0x1e } },
-	{ { 0, 0x00, 0x2e }, { 1, 0x00, 0x1f } },
-	{ { 0, 0x00, 0x2f }, { 1, 0x00, 0x1f } },
-	{ { 0, 0x01, 0x2e }, { 1, 0x01, 0x1f } },
-	{ { 0, 0x00, 0x30 }, { 1, 0x00, 0x20 } },
-	{ { 0, 0x00, 0x31 }, { 1, 0x02, 0x1f } },
-	{ { 0, 0x00, 0x32 }, { 1, 0x00, 0x21 } },
-	{ { 0, 0x02, 0x2f }, { 1, 0x03, 0x1f } },
-	{ { 0, 0x00, 0x33 }, { 1, 0x00, 0x22 } },
-	{ { 0, 0x00, 0x34 }, { 1, 0x04, 0x1f } },
-	{ { 0, 0x00, 0x35 }, { 1, 0x00, 0x23 } },
-	{ { 0, 0x04, 0x2e }, { 1, 0x05, 0x1f } },
-	{ { 0, 0x00, 0x36 }, { 1, 0x00, 0x24 } },
-	{ { 0, 0x00, 0x37 }, { 1, 0x06, 0x1f } },
-	{ { 0, 0x00, 0x38 }, { 1, 0x00, 0x25 } },
-	{ { 0, 0x05, 0x2f }, { 1, 0x07, 0x1f } },
-	{ { 0, 0x00, 0x39 }, { 1, 0x00, 0x26 } },
-	{ { 0, 0x00, 0x3a }, { 1, 0x08, 0x1f } },
-	{ { 0, 0x00, 0x3b }, { 1, 0x00, 0x27 } },
-	{ { 0, 0x07, 0x2e }, { 1, 0x09, 0x1f } },
-	{ { 0, 0x00, 0x3c }, { 1, 0x00, 0x28 } },
-	{ { 0, 0x00, 0x3d }, { 1, 0x0a, 0x1f } },
-	{ { 0, 0x00, 0x3e }, { 1, 0x00, 0x29 } },
-	{ { 0, 0x08, 0x2f }, { 1, 0x0b, 0x1f } },
-	{ { 0, 0x00, 0x3f }, { 1, 0x00, 0x2a } },
-	{ { 0, 0x01, 0x3e }, { 1, 0x0c, 0x1f } },
-	{ { 0, 0x01, 0x3f }, { 1, 0x00, 0x2b } },
-	{ { 0, 0x0a, 0x2e }, { 1, 0x0d, 0x1f } },
-	{ { 0, 0x02, 0x3e }, { 1, 0x00, 0x2c } },
-	{ { 0, 0x02, 0x3f }, { 1, 0x0e, 0x1f } },
-	{ { 0, 0x03, 0x3e }, { 1, 0x00, 0x2d } },
-	{ { 0, 0x0b, 0x2f }, { 1, 0x0f, 0x1f } },
-	{ { 0, 0x03, 0x3f }, { 1, 0x00, 0x2e } },
-	{ { 0, 0x04, 0x3e }, { 1, 0x00, 0x2e } },
-	{ { 0, 0x04, 0x3f }, { 1, 0x00, 0x2f } },
-	{ { 0, 0x0d, 0x2e }, { 1, 0x00, 0x2f } },
-	{ { 0, 0x05, 0x3e }, { 1, 0x00, 0x30 } },
-	{ { 0, 0x05, 0x3f }, { 1, 0x00, 0x30 } },
-	{ { 0, 0x06, 0x3e }, { 1, 0x00, 0x31 } },
-	{ { 0, 0x0e, 0x2f }, { 1, 0x00, 0x31 } },
-	{ { 0, 0x06, 0x3f }, { 1, 0x00, 0x32 } },
-	{ { 0, 0x07, 0x3e }, { 1, 0x00, 0x32 } },
-	{ { 0, 0x07, 0x3f }, { 1, 0x00, 0x33 } },
-	{ { 0, 0x10, 0x2d }, { 1, 0x00, 0x33 } },
-	{ { 0, 0x08, 0x3e }, { 1, 0x00, 0x34 } },
-	{ { 0, 0x08, 0x3f }, { 1, 0x00, 0x34 } },
-	{ { 0, 0x09, 0x3e }, { 1, 0x00, 0x35 } },
-	{ { 0, 0x10, 0x30 }, { 1, 0x00, 0x35 } },
-	{ { 0, 0x09, 0x3f }, { 1, 0x00, 0x36 } },
-	{ { 0, 0x0a, 0x3e }, { 1, 0x00, 0x36 } },
-	{ { 0, 0x0a, 0x3f }, { 1, 0x00, 0x37 } },
-	{ { 0, 0x10, 0x33 }, { 1, 0x00, 0x37 } },
-	{ { 0, 0x0b, 0x3e }, { 1, 0x00, 0x38 } },
-	{ { 0, 0x0b, 0x3f }, { 1, 0x00, 0x38 } },
-	{ { 0, 0x0c, 0x3e }, { 1, 0x00, 0x39 } },
-	{ { 0, 0x10, 0x36 }, { 1, 0x00, 0x39 } },
-	{ { 0, 0x0c, 0x3f }, { 1, 0x00, 0x3a } },
-	{ { 0, 0x0d, 0x3e }, { 1, 0x00, 0x3a } },
-	{ { 0, 0x0d, 0x3f }, { 1, 0x00, 0x3b } },
-	{ { 0, 0x10, 0x39 }, { 1, 0x00, 0x3b } },
-	{ { 0, 0x0e, 0x3e }, { 1, 0x00, 0x3c } },
-	{ { 0, 0x0e, 0x3f }, { 1, 0x00, 0x3c } },
-	{ { 0, 0x0f, 0x3e }, { 1, 0x00, 0x3d } },
-	{ { 0, 0x10, 0x3c }, { 1, 0x00, 0x3d } },
-	{ { 0, 0x0f, 0x3f }, { 1, 0x00, 0x3e } },
-	{ { 0, 0x18, 0x2e }, { 1, 0x00, 0x3e } },
-	{ { 0, 0x10, 0x3e }, { 1, 0x00, 0x3f } },
-	{ { 0, 0x10, 0x3f }, { 1, 0x00, 0x3f } },
-	{ { 0, 0x11, 0x3e }, { 1, 0x01, 0x3f } },
-	{ { 0, 0x19, 0x2f }, { 1, 0x10, 0x30 } },
-	{ { 0, 0x11, 0x3f }, { 1, 0x02, 0x3f } },
-	{ { 0, 0x12, 0x3e }, { 1, 0x10, 0x31 } },
-	{ { 0, 0x12, 0x3f }, { 1, 0x03, 0x3f } },
-	{ { 0, 0x1b, 0x2e }, { 1, 0x10, 0x32 } },
-	{ { 0, 0x13, 0x3e }, { 1, 0x04, 0x3f } },
-	{ { 0, 0x13, 0x3f }, { 1, 0x10, 0x33 } },
-	{ { 0, 0x14, 0x3e }, { 1, 0x05, 0x3f } },
-	{ { 0, 0x1c, 0x2f }, { 1, 0x10, 0x34 } },
-	{ { 0, 0x14, 0x3f }, { 1, 0x06, 0x3f } },
-	{ { 0, 0x15, 0x3e }, { 1, 0x10, 0x35 } },
-	{ { 0, 0x15, 0x3f }, { 1, 0x07, 0x3f } },
-	{ { 0, 0x1e, 0x2e }, { 1, 0x10, 0x36 } },
-	{ { 0, 0x16, 0x3e }, { 1, 0x08, 0x3f } },
-	{ { 0, 0x16, 0x3f }, { 1, 0x10, 0x37 } },
-	{ { 0, 0x17, 0x3e }, { 1, 0x09, 0x3f } },
-	{ { 0, 0x1f, 0x2f }, { 1, 0x10, 0x38 } },
-	{ { 0, 0x17, 0x3f }, { 1, 0x0a, 0x3f } },
-	{ { 0, 0x18, 0x3e }, { 1, 0x10, 0x39 } },
-	{ { 0, 0x18, 0x3f }, { 1, 0x0b, 0x3f } },
-	{ { 0, 0x20, 0x2f }, { 1, 0x10, 0x3a } },
-	{ { 0, 0x19, 0x3e }, { 1, 0x0c, 0x3f } },
-	{ { 0, 0x19, 0x3f }, { 1, 0x10, 0x3b } },
-	{ { 0, 0x1a, 0x3e }, { 1, 0x0d, 0x3f } },
-	{ { 0, 0x20, 0x32 }, { 1, 0x10, 0x3c } },
-	{ { 0, 0x1a, 0x3f }, { 1, 0x0e, 0x3f } },
-	{ { 0, 0x1b, 0x3e }, { 1, 0x10, 0x3d } },
-	{ { 0, 0x1b, 0x3f }, { 1, 0x0f, 0x3f } },
-	{ { 0, 0x20, 0x35 }, { 1, 0x10, 0x3e } },
-	{ { 0, 0x1c, 0x3e }, { 1, 0x10, 0x3e } },
-	{ { 0, 0x1c, 0x3f }, { 1, 0x10, 0x3f } },
-	{ { 0, 0x1d, 0x3e }, { 1, 0x10, 0x3f } },
-	{ { 0, 0x20, 0x38 }, { 1, 0x11, 0x3f } },
-	{ { 0, 0x1d, 0x3f }, { 1, 0x11, 0x3f } },
-	{ { 0, 0x1e, 0x3e }, { 1, 0x12, 0x3f } },
-	{ { 0, 0x1e, 0x3f }, { 1, 0x12, 0x3f } },
-	{ { 0, 0x20, 0x3b }, { 1, 0x13, 0x3f } },
-	{ { 0, 0x1f, 0x3e }, { 1, 0x13, 0x3f } },
-	{ { 0, 0x1f, 0x3f }, { 1, 0x14, 0x3f } },
-	{ { 0, 0x20, 0x3d }, { 1, 0x14, 0x3f } },
-	{ { 0, 0x20, 0x3e }, { 1, 0x15, 0x3f } },
-	{ { 0, 0x20, 0x3f }, { 1, 0x15, 0x3f } },
-	{ { 0, 0x29, 0x2e }, { 1, 0x16, 0x3f } },
-	{ { 0, 0x21, 0x3e }, { 1, 0x16, 0x3f } },
-	{ { 0, 0x21, 0x3f }, { 1, 0x17, 0x3f } },
-	{ { 0, 0x22, 0x3e }, { 1, 0x17, 0x3f } },
-	{ { 0, 0x2a, 0x2f }, { 1, 0x18, 0x3f } },
-	{ { 0, 0x22, 0x3f }, { 1, 0x18, 0x3f } },
-	{ { 0, 0x23, 0x3e }, { 1, 0x19, 0x3f } },
-	{ { 0, 0x23, 0x3f }, { 1, 0x19, 0x3f } },
-	{ { 0, 0x2c, 0x2e }, { 1, 0x1a, 0x3f } },
-	{ { 0, 0x24, 0x3e }, { 1, 0x1a, 0x3f } },
-	{ { 0, 0x24, 0x3f }, { 1, 0x1b, 0x3f } },
-	{ { 0, 0x25, 0x3e }, { 1, 0x1b, 0x3f } },
-	{ { 0, 0x2d, 0x2f }, { 1, 0x1c, 0x3f } },
-	{ { 0, 0x25, 0x3f }, { 1, 0x1c, 0x3f } },
-	{ { 0, 0x26, 0x3e }, { 1, 0x1d, 0x3f } },
-	{ { 0, 0x26, 0x3f }, { 1, 0x1d, 0x3f } },
-	{ { 1, 0x1e, 0x3f }, { 0, 0x26, 0x3f } },
-	{ { 0, 0x27, 0x3e }, { 1, 0x1e, 0x3f } },
-	{ { 0, 0x27, 0x3f }, { 1, 0x1f, 0x3f } },
-	{ { 0, 0x28, 0x3e }, { 1, 0x1f, 0x3f } },
-	{ { 1, 0x20, 0x3f }, { 0, 0x28, 0x3e } },
-	{ { 0, 0x28, 0x3f }, { 1, 0x20, 0x3f } },
-	{ { 0, 0x29, 0x3e }, { 1, 0x21, 0x3f } },
-	{ { 0, 0x29, 0x3f }, { 1, 0x30, 0x30 } },
-	{ { 0, 0x30, 0x31 }, { 1, 0x22, 0x3f } },
-	{ { 0, 0x2a, 0x3e }, { 1, 0x30, 0x31 } },
-	{ { 0, 0x2a, 0x3f }, { 1, 0x23, 0x3f } },
-	{ { 0, 0x2b, 0x3e }, { 1, 0x30, 0x32 } },
-	{ { 0, 0x30, 0x34 }, { 1, 0x24, 0x3f } },
-	{ { 0, 0x2b, 0x3f }, { 1, 0x30, 0x33 } },
-	{ { 0, 0x2c, 0x3e }, { 1, 0x25, 0x3f } },
-	{ { 0, 0x2c, 0x3f }, { 1, 0x30, 0x34 } },
-	{ { 0, 0x30, 0x37 }, { 1, 0x26, 0x3f } },
-	{ { 0, 0x2d, 0x3e }, { 1, 0x30, 0x35 } },
-	{ { 0, 0x2d, 0x3f }, { 1, 0x27, 0x3f } },
-	{ { 0, 0x2e, 0x3e }, { 1, 0x30, 0x36 } },
-	{ { 0, 0x30, 0x3a }, { 1, 0x28, 0x3f } },
-	{ { 0, 0x2e, 0x3f }, { 1, 0x30, 0x37 } },
-	{ { 0, 0x2f, 0x3e }, { 1, 0x29, 0x3f } },
-	{ { 0, 0x2f, 0x3f }, { 1, 0x30, 0x38 } },
-	{ { 0, 0x30, 0x3d }, { 1, 0x2a, 0x3f } },
-	{ { 0, 0x30, 0x3e }, { 1, 0x30, 0x39 } },
-	{ { 1, 0x2b, 0x3f }, { 0, 0x30, 0x3e } },
-	{ { 0, 0x30, 0x3f }, { 1, 0x30, 0x3a } },
-	{ { 0, 0x31, 0x3e }, { 1, 0x2c, 0x3f } },
-	{ { 0, 0x31, 0x3f }, { 1, 0x30, 0x3b } },
-	{ { 1, 0x2d, 0x3f }, { 0, 0x31, 0x3f } },
-	{ { 0, 0x32, 0x3e }, { 1, 0x30, 0x3c } },
-	{ { 0, 0x32, 0x3f }, { 1, 0x2e, 0x3f } },
-	{ { 0, 0x33, 0x3e }, { 1, 0x30, 0x3d } },
-	{ { 1, 0x2f, 0x3f }, { 0, 0x33, 0x3e } },
-	{ { 0, 0x33, 0x3f }, { 1, 0x30, 0x3e } },
-	{ { 0, 0x34, 0x3e }, { 1, 0x30, 0x3e } },
-	{ { 0, 0x34, 0x3f }, { 1, 0x30, 0x3f } },
-	{ { 0, 0x34, 0x3f }, { 1, 0x30, 0x3f } },
-	{ { 0, 0x35, 0x3e }, { 1, 0x31, 0x3f } },
-	{ { 0, 0x35, 0x3f }, { 1, 0x31, 0x3f } },
-	{ { 0, 0x36, 0x3e }, { 1, 0x32, 0x3f } },
-	{ { 0, 0x36, 0x3e }, { 1, 0x32, 0x3f } },
-	{ { 0, 0x36, 0x3f }, { 1, 0x33, 0x3f } },
-	{ { 0, 0x37, 0x3e }, { 1, 0x33, 0x3f } },
-	{ { 0, 0x37, 0x3f }, { 1, 0x34, 0x3f } },
-	{ { 0, 0x37, 0x3f }, { 1, 0x34, 0x3f } },
-	{ { 0, 0x38, 0x3e }, { 1, 0x35, 0x3f } },
-	{ { 0, 0x38, 0x3f }, { 1, 0x35, 0x3f } },
-	{ { 0, 0x39, 0x3e }, { 1, 0x36, 0x3f } },
-	{ { 0, 0x39, 0x3e }, { 1, 0x36, 0x3f } },
-	{ { 0, 0x39, 0x3f }, { 1, 0x37, 0x3f } },
-	{ { 0, 0x3a, 0x3e }, { 1, 0x37, 0x3f } },
-	{ { 0, 0x3a, 0x3f }, { 1, 0x38, 0x3f } },
-	{ { 0, 0x3a, 0x3f }, { 1, 0x38, 0x3f } },
-	{ { 0, 0x3b, 0x3e }, { 1, 0x39, 0x3f } },
-	{ { 0, 0x3b, 0x3f }, { 1, 0x39, 0x3f } },
-	{ { 0, 0x3c, 0x3e }, { 1, 0x3a, 0x3f } },
-	{ { 0, 0x3c, 0x3e }, { 1, 0x3a, 0x3f } },
-	{ { 0, 0x3c, 0x3f }, { 1, 0x3b, 0x3f } },
-	{ { 0, 0x3d, 0x3e }, { 1, 0x3b, 0x3f } },
-	{ { 0, 0x3d, 0x3f }, { 1, 0x3c, 0x3f } },
-	{ { 0, 0x3d, 0x3f }, { 1, 0x3c, 0x3f } },
-	{ { 0, 0x3e, 0x3e }, { 1, 0x3d, 0x3f } },
-	{ { 0, 0x3e, 0x3f }, { 1, 0x3d, 0x3f } },
-	{ { 1, 0x3e, 0x3f }, { 0, 0x3e, 0x3f } },
-	{ { 0, 0x3f, 0x3f }, { 1, 0x3e, 0x3f } },
-	{ { 0, 0x3f, 0x3f }, { 1, 0x3f, 0x3f } }
+  { { 0, 0x00, 0x00 }, { 1, 0x00, 0x00 } },
+  { { 0, 0x00, 0x01 }, { 1, 0x00, 0x00 } },
+  { { 0, 0x00, 0x02 }, { 1, 0x00, 0x01 } },
+  { { 0, 0x00, 0x02 }, { 1, 0x00, 0x01 } },
+  { { 0, 0x00, 0x03 }, { 1, 0x00, 0x02 } },
+  { { 0, 0x00, 0x04 }, { 1, 0x00, 0x02 } },
+  { { 0, 0x00, 0x05 }, { 1, 0x00, 0x03 } },
+  { { 0, 0x00, 0x05 }, { 1, 0x00, 0x03 } },
+  { { 0, 0x00, 0x06 }, { 1, 0x00, 0x04 } },
+  { { 0, 0x00, 0x07 }, { 1, 0x00, 0x04 } },
+  { { 0, 0x00, 0x08 }, { 1, 0x00, 0x05 } },
+  { { 0, 0x00, 0x08 }, { 1, 0x00, 0x05 } },
+  { { 0, 0x00, 0x09 }, { 1, 0x00, 0x06 } },
+  { { 0, 0x00, 0x0a }, { 1, 0x00, 0x06 } },
+  { { 0, 0x00, 0x0b }, { 1, 0x00, 0x07 } },
+  { { 0, 0x00, 0x0b }, { 1, 0x00, 0x07 } },
+  { { 0, 0x00, 0x0c }, { 1, 0x00, 0x08 } },
+  { { 0, 0x00, 0x0d }, { 1, 0x00, 0x08 } },
+  { { 0, 0x00, 0x0e }, { 1, 0x00, 0x09 } },
+  { { 0, 0x00, 0x0e }, { 1, 0x00, 0x09 } },
+  { { 0, 0x00, 0x0f }, { 1, 0x00, 0x0a } },
+  { { 0, 0x00, 0x10 }, { 1, 0x00, 0x0a } },
+  { { 0, 0x01, 0x0f }, { 1, 0x00, 0x0b } },
+  { { 0, 0x00, 0x11 }, { 1, 0x00, 0x0b } },
+  { { 0, 0x00, 0x12 }, { 1, 0x00, 0x0c } },
+  { { 0, 0x00, 0x13 }, { 1, 0x00, 0x0c } },
+  { { 0, 0x03, 0x0e }, { 1, 0x00, 0x0d } },
+  { { 0, 0x00, 0x14 }, { 1, 0x00, 0x0d } },
+  { { 0, 0x00, 0x15 }, { 1, 0x00, 0x0e } },
+  { { 0, 0x00, 0x16 }, { 1, 0x00, 0x0e } },
+  { { 0, 0x04, 0x0f }, { 1, 0x00, 0x0f } },
+  { { 0, 0x00, 0x17 }, { 1, 0x00, 0x0f } },
+  { { 0, 0x00, 0x18 }, { 1, 0x00, 0x10 } },
+  { { 0, 0x00, 0x19 }, { 1, 0x00, 0x10 } },
+  { { 0, 0x06, 0x0e }, { 1, 0x00, 0x11 } },
+  { { 0, 0x00, 0x1a }, { 1, 0x00, 0x11 } },
+  { { 0, 0x00, 0x1b }, { 1, 0x00, 0x12 } },
+  { { 0, 0x00, 0x1c }, { 1, 0x00, 0x12 } },
+  { { 0, 0x07, 0x0f }, { 1, 0x00, 0x13 } },
+  { { 0, 0x00, 0x1d }, { 1, 0x00, 0x13 } },
+  { { 0, 0x00, 0x1e }, { 1, 0x00, 0x14 } },
+  { { 0, 0x00, 0x1f }, { 1, 0x00, 0x14 } },
+  { { 0, 0x09, 0x0e }, { 1, 0x00, 0x15 } },
+  { { 0, 0x00, 0x20 }, { 1, 0x00, 0x15 } },
+  { { 0, 0x00, 0x21 }, { 1, 0x00, 0x16 } },
+  { { 0, 0x02, 0x1e }, { 1, 0x00, 0x16 } },
+  { { 0, 0x00, 0x22 }, { 1, 0x00, 0x17 } },
+  { { 0, 0x00, 0x23 }, { 1, 0x00, 0x17 } },
+  { { 0, 0x00, 0x24 }, { 1, 0x00, 0x18 } },
+  { { 0, 0x03, 0x1f }, { 1, 0x00, 0x18 } },
+  { { 0, 0x00, 0x25 }, { 1, 0x00, 0x19 } },
+  { { 0, 0x00, 0x26 }, { 1, 0x00, 0x19 } },
+  { { 0, 0x00, 0x27 }, { 1, 0x00, 0x1a } },
+  { { 0, 0x05, 0x1e }, { 1, 0x00, 0x1a } },
+  { { 0, 0x00, 0x28 }, { 1, 0x00, 0x1b } },
+  { { 0, 0x00, 0x29 }, { 1, 0x00, 0x1b } },
+  { { 0, 0x00, 0x2a }, { 1, 0x00, 0x1c } },
+  { { 0, 0x06, 0x1f }, { 1, 0x00, 0x1c } },
+  { { 0, 0x00, 0x2b }, { 1, 0x00, 0x1d } },
+  { { 0, 0x00, 0x2c }, { 1, 0x00, 0x1d } },
+  { { 0, 0x00, 0x2d }, { 1, 0x00, 0x1e } },
+  { { 0, 0x08, 0x1e }, { 1, 0x00, 0x1e } },
+  { { 0, 0x00, 0x2e }, { 1, 0x00, 0x1f } },
+  { { 0, 0x00, 0x2f }, { 1, 0x00, 0x1f } },
+  { { 0, 0x01, 0x2e }, { 1, 0x01, 0x1f } },
+  { { 0, 0x00, 0x30 }, { 1, 0x00, 0x20 } },
+  { { 0, 0x00, 0x31 }, { 1, 0x02, 0x1f } },
+  { { 0, 0x00, 0x32 }, { 1, 0x00, 0x21 } },
+  { { 0, 0x02, 0x2f }, { 1, 0x03, 0x1f } },
+  { { 0, 0x00, 0x33 }, { 1, 0x00, 0x22 } },
+  { { 0, 0x00, 0x34 }, { 1, 0x04, 0x1f } },
+  { { 0, 0x00, 0x35 }, { 1, 0x00, 0x23 } },
+  { { 0, 0x04, 0x2e }, { 1, 0x05, 0x1f } },
+  { { 0, 0x00, 0x36 }, { 1, 0x00, 0x24 } },
+  { { 0, 0x00, 0x37 }, { 1, 0x06, 0x1f } },
+  { { 0, 0x00, 0x38 }, { 1, 0x00, 0x25 } },
+  { { 0, 0x05, 0x2f }, { 1, 0x07, 0x1f } },
+  { { 0, 0x00, 0x39 }, { 1, 0x00, 0x26 } },
+  { { 0, 0x00, 0x3a }, { 1, 0x08, 0x1f } },
+  { { 0, 0x00, 0x3b }, { 1, 0x00, 0x27 } },
+  { { 0, 0x07, 0x2e }, { 1, 0x09, 0x1f } },
+  { { 0, 0x00, 0x3c }, { 1, 0x00, 0x28 } },
+  { { 0, 0x00, 0x3d }, { 1, 0x0a, 0x1f } },
+  { { 0, 0x00, 0x3e }, { 1, 0x00, 0x29 } },
+  { { 0, 0x08, 0x2f }, { 1, 0x0b, 0x1f } },
+  { { 0, 0x00, 0x3f }, { 1, 0x00, 0x2a } },
+  { { 0, 0x01, 0x3e }, { 1, 0x0c, 0x1f } },
+  { { 0, 0x01, 0x3f }, { 1, 0x00, 0x2b } },
+  { { 0, 0x0a, 0x2e }, { 1, 0x0d, 0x1f } },
+  { { 0, 0x02, 0x3e }, { 1, 0x00, 0x2c } },
+  { { 0, 0x02, 0x3f }, { 1, 0x0e, 0x1f } },
+  { { 0, 0x03, 0x3e }, { 1, 0x00, 0x2d } },
+  { { 0, 0x0b, 0x2f }, { 1, 0x0f, 0x1f } },
+  { { 0, 0x03, 0x3f }, { 1, 0x00, 0x2e } },
+  { { 0, 0x04, 0x3e }, { 1, 0x00, 0x2e } },
+  { { 0, 0x04, 0x3f }, { 1, 0x00, 0x2f } },
+  { { 0, 0x0d, 0x2e }, { 1, 0x00, 0x2f } },
+  { { 0, 0x05, 0x3e }, { 1, 0x00, 0x30 } },
+  { { 0, 0x05, 0x3f }, { 1, 0x00, 0x30 } },
+  { { 0, 0x06, 0x3e }, { 1, 0x00, 0x31 } },
+  { { 0, 0x0e, 0x2f }, { 1, 0x00, 0x31 } },
+  { { 0, 0x06, 0x3f }, { 1, 0x00, 0x32 } },
+  { { 0, 0x07, 0x3e }, { 1, 0x00, 0x32 } },
+  { { 0, 0x07, 0x3f }, { 1, 0x00, 0x33 } },
+  { { 0, 0x10, 0x2d }, { 1, 0x00, 0x33 } },
+  { { 0, 0x08, 0x3e }, { 1, 0x00, 0x34 } },
+  { { 0, 0x08, 0x3f }, { 1, 0x00, 0x34 } },
+  { { 0, 0x09, 0x3e }, { 1, 0x00, 0x35 } },
+  { { 0, 0x10, 0x30 }, { 1, 0x00, 0x35 } },
+  { { 0, 0x09, 0x3f }, { 1, 0x00, 0x36 } },
+  { { 0, 0x0a, 0x3e }, { 1, 0x00, 0x36 } },
+  { { 0, 0x0a, 0x3f }, { 1, 0x00, 0x37 } },
+  { { 0, 0x10, 0x33 }, { 1, 0x00, 0x37 } },
+  { { 0, 0x0b, 0x3e }, { 1, 0x00, 0x38 } },
+  { { 0, 0x0b, 0x3f }, { 1, 0x00, 0x38 } },
+  { { 0, 0x0c, 0x3e }, { 1, 0x00, 0x39 } },
+  { { 0, 0x10, 0x36 }, { 1, 0x00, 0x39 } },
+  { { 0, 0x0c, 0x3f }, { 1, 0x00, 0x3a } },
+  { { 0, 0x0d, 0x3e }, { 1, 0x00, 0x3a } },
+  { { 0, 0x0d, 0x3f }, { 1, 0x00, 0x3b } },
+  { { 0, 0x10, 0x39 }, { 1, 0x00, 0x3b } },
+  { { 0, 0x0e, 0x3e }, { 1, 0x00, 0x3c } },
+  { { 0, 0x0e, 0x3f }, { 1, 0x00, 0x3c } },
+  { { 0, 0x0f, 0x3e }, { 1, 0x00, 0x3d } },
+  { { 0, 0x10, 0x3c }, { 1, 0x00, 0x3d } },
+  { { 0, 0x0f, 0x3f }, { 1, 0x00, 0x3e } },
+  { { 0, 0x18, 0x2e }, { 1, 0x00, 0x3e } },
+  { { 0, 0x10, 0x3e }, { 1, 0x00, 0x3f } },
+  { { 0, 0x10, 0x3f }, { 1, 0x00, 0x3f } },
+  { { 0, 0x11, 0x3e }, { 1, 0x01, 0x3f } },
+  { { 0, 0x19, 0x2f }, { 1, 0x10, 0x30 } },
+  { { 0, 0x11, 0x3f }, { 1, 0x02, 0x3f } },
+  { { 0, 0x12, 0x3e }, { 1, 0x10, 0x31 } },
+  { { 0, 0x12, 0x3f }, { 1, 0x03, 0x3f } },
+  { { 0, 0x1b, 0x2e }, { 1, 0x10, 0x32 } },
+  { { 0, 0x13, 0x3e }, { 1, 0x04, 0x3f } },
+  { { 0, 0x13, 0x3f }, { 1, 0x10, 0x33 } },
+  { { 0, 0x14, 0x3e }, { 1, 0x05, 0x3f } },
+  { { 0, 0x1c, 0x2f }, { 1, 0x10, 0x34 } },
+  { { 0, 0x14, 0x3f }, { 1, 0x06, 0x3f } },
+  { { 0, 0x15, 0x3e }, { 1, 0x10, 0x35 } },
+  { { 0, 0x15, 0x3f }, { 1, 0x07, 0x3f } },
+  { { 0, 0x1e, 0x2e }, { 1, 0x10, 0x36 } },
+  { { 0, 0x16, 0x3e }, { 1, 0x08, 0x3f } },
+  { { 0, 0x16, 0x3f }, { 1, 0x10, 0x37 } },
+  { { 0, 0x17, 0x3e }, { 1, 0x09, 0x3f } },
+  { { 0, 0x1f, 0x2f }, { 1, 0x10, 0x38 } },
+  { { 0, 0x17, 0x3f }, { 1, 0x0a, 0x3f } },
+  { { 0, 0x18, 0x3e }, { 1, 0x10, 0x39 } },
+  { { 0, 0x18, 0x3f }, { 1, 0x0b, 0x3f } },
+  { { 0, 0x20, 0x2f }, { 1, 0x10, 0x3a } },
+  { { 0, 0x19, 0x3e }, { 1, 0x0c, 0x3f } },
+  { { 0, 0x19, 0x3f }, { 1, 0x10, 0x3b } },
+  { { 0, 0x1a, 0x3e }, { 1, 0x0d, 0x3f } },
+  { { 0, 0x20, 0x32 }, { 1, 0x10, 0x3c } },
+  { { 0, 0x1a, 0x3f }, { 1, 0x0e, 0x3f } },
+  { { 0, 0x1b, 0x3e }, { 1, 0x10, 0x3d } },
+  { { 0, 0x1b, 0x3f }, { 1, 0x0f, 0x3f } },
+  { { 0, 0x20, 0x35 }, { 1, 0x10, 0x3e } },
+  { { 0, 0x1c, 0x3e }, { 1, 0x10, 0x3e } },
+  { { 0, 0x1c, 0x3f }, { 1, 0x10, 0x3f } },
+  { { 0, 0x1d, 0x3e }, { 1, 0x10, 0x3f } },
+  { { 0, 0x20, 0x38 }, { 1, 0x11, 0x3f } },
+  { { 0, 0x1d, 0x3f }, { 1, 0x11, 0x3f } },
+  { { 0, 0x1e, 0x3e }, { 1, 0x12, 0x3f } },
+  { { 0, 0x1e, 0x3f }, { 1, 0x12, 0x3f } },
+  { { 0, 0x20, 0x3b }, { 1, 0x13, 0x3f } },
+  { { 0, 0x1f, 0x3e }, { 1, 0x13, 0x3f } },
+  { { 0, 0x1f, 0x3f }, { 1, 0x14, 0x3f } },
+  { { 0, 0x20, 0x3d }, { 1, 0x14, 0x3f } },
+  { { 0, 0x20, 0x3e }, { 1, 0x15, 0x3f } },
+  { { 0, 0x20, 0x3f }, { 1, 0x15, 0x3f } },
+  { { 0, 0x29, 0x2e }, { 1, 0x16, 0x3f } },
+  { { 0, 0x21, 0x3e }, { 1, 0x16, 0x3f } },
+  { { 0, 0x21, 0x3f }, { 1, 0x17, 0x3f } },
+  { { 0, 0x22, 0x3e }, { 1, 0x17, 0x3f } },
+  { { 0, 0x2a, 0x2f }, { 1, 0x18, 0x3f } },
+  { { 0, 0x22, 0x3f }, { 1, 0x18, 0x3f } },
+  { { 0, 0x23, 0x3e }, { 1, 0x19, 0x3f } },
+  { { 0, 0x23, 0x3f }, { 1, 0x19, 0x3f } },
+  { { 0, 0x2c, 0x2e }, { 1, 0x1a, 0x3f } },
+  { { 0, 0x24, 0x3e }, { 1, 0x1a, 0x3f } },
+  { { 0, 0x24, 0x3f }, { 1, 0x1b, 0x3f } },
+  { { 0, 0x25, 0x3e }, { 1, 0x1b, 0x3f } },
+  { { 0, 0x2d, 0x2f }, { 1, 0x1c, 0x3f } },
+  { { 0, 0x25, 0x3f }, { 1, 0x1c, 0x3f } },
+  { { 0, 0x26, 0x3e }, { 1, 0x1d, 0x3f } },
+  { { 0, 0x26, 0x3f }, { 1, 0x1d, 0x3f } },
+  { { 1, 0x1e, 0x3f }, { 0, 0x26, 0x3f } },
+  { { 0, 0x27, 0x3e }, { 1, 0x1e, 0x3f } },
+  { { 0, 0x27, 0x3f }, { 1, 0x1f, 0x3f } },
+  { { 0, 0x28, 0x3e }, { 1, 0x1f, 0x3f } },
+  { { 1, 0x20, 0x3f }, { 0, 0x28, 0x3e } },
+  { { 0, 0x28, 0x3f }, { 1, 0x20, 0x3f } },
+  { { 0, 0x29, 0x3e }, { 1, 0x21, 0x3f } },
+  { { 0, 0x29, 0x3f }, { 1, 0x30, 0x30 } },
+  { { 0, 0x30, 0x31 }, { 1, 0x22, 0x3f } },
+  { { 0, 0x2a, 0x3e }, { 1, 0x30, 0x31 } },
+  { { 0, 0x2a, 0x3f }, { 1, 0x23, 0x3f } },
+  { { 0, 0x2b, 0x3e }, { 1, 0x30, 0x32 } },
+  { { 0, 0x30, 0x34 }, { 1, 0x24, 0x3f } },
+  { { 0, 0x2b, 0x3f }, { 1, 0x30, 0x33 } },
+  { { 0, 0x2c, 0x3e }, { 1, 0x25, 0x3f } },
+  { { 0, 0x2c, 0x3f }, { 1, 0x30, 0x34 } },
+  { { 0, 0x30, 0x37 }, { 1, 0x26, 0x3f } },
+  { { 0, 0x2d, 0x3e }, { 1, 0x30, 0x35 } },
+  { { 0, 0x2d, 0x3f }, { 1, 0x27, 0x3f } },
+  { { 0, 0x2e, 0x3e }, { 1, 0x30, 0x36 } },
+  { { 0, 0x30, 0x3a }, { 1, 0x28, 0x3f } },
+  { { 0, 0x2e, 0x3f }, { 1, 0x30, 0x37 } },
+  { { 0, 0x2f, 0x3e }, { 1, 0x29, 0x3f } },
+  { { 0, 0x2f, 0x3f }, { 1, 0x30, 0x38 } },
+  { { 0, 0x30, 0x3d }, { 1, 0x2a, 0x3f } },
+  { { 0, 0x30, 0x3e }, { 1, 0x30, 0x39 } },
+  { { 1, 0x2b, 0x3f }, { 0, 0x30, 0x3e } },
+  { { 0, 0x30, 0x3f }, { 1, 0x30, 0x3a } },
+  { { 0, 0x31, 0x3e }, { 1, 0x2c, 0x3f } },
+  { { 0, 0x31, 0x3f }, { 1, 0x30, 0x3b } },
+  { { 1, 0x2d, 0x3f }, { 0, 0x31, 0x3f } },
+  { { 0, 0x32, 0x3e }, { 1, 0x30, 0x3c } },
+  { { 0, 0x32, 0x3f }, { 1, 0x2e, 0x3f } },
+  { { 0, 0x33, 0x3e }, { 1, 0x30, 0x3d } },
+  { { 1, 0x2f, 0x3f }, { 0, 0x33, 0x3e } },
+  { { 0, 0x33, 0x3f }, { 1, 0x30, 0x3e } },
+  { { 0, 0x34, 0x3e }, { 1, 0x30, 0x3e } },
+  { { 0, 0x34, 0x3f }, { 1, 0x30, 0x3f } },
+  { { 0, 0x34, 0x3f }, { 1, 0x30, 0x3f } },
+  { { 0, 0x35, 0x3e }, { 1, 0x31, 0x3f } },
+  { { 0, 0x35, 0x3f }, { 1, 0x31, 0x3f } },
+  { { 0, 0x36, 0x3e }, { 1, 0x32, 0x3f } },
+  { { 0, 0x36, 0x3e }, { 1, 0x32, 0x3f } },
+  { { 0, 0x36, 0x3f }, { 1, 0x33, 0x3f } },
+  { { 0, 0x37, 0x3e }, { 1, 0x33, 0x3f } },
+  { { 0, 0x37, 0x3f }, { 1, 0x34, 0x3f } },
+  { { 0, 0x37, 0x3f }, { 1, 0x34, 0x3f } },
+  { { 0, 0x38, 0x3e }, { 1, 0x35, 0x3f } },
+  { { 0, 0x38, 0x3f }, { 1, 0x35, 0x3f } },
+  { { 0, 0x39, 0x3e }, { 1, 0x36, 0x3f } },
+  { { 0, 0x39, 0x3e }, { 1, 0x36, 0x3f } },
+  { { 0, 0x39, 0x3f }, { 1, 0x37, 0x3f } },
+  { { 0, 0x3a, 0x3e }, { 1, 0x37, 0x3f } },
+  { { 0, 0x3a, 0x3f }, { 1, 0x38, 0x3f } },
+  { { 0, 0x3a, 0x3f }, { 1, 0x38, 0x3f } },
+  { { 0, 0x3b, 0x3e }, { 1, 0x39, 0x3f } },
+  { { 0, 0x3b, 0x3f }, { 1, 0x39, 0x3f } },
+  { { 0, 0x3c, 0x3e }, { 1, 0x3a, 0x3f } },
+  { { 0, 0x3c, 0x3e }, { 1, 0x3a, 0x3f } },
+  { { 0, 0x3c, 0x3f }, { 1, 0x3b, 0x3f } },
+  { { 0, 0x3d, 0x3e }, { 1, 0x3b, 0x3f } },
+  { { 0, 0x3d, 0x3f }, { 1, 0x3c, 0x3f } },
+  { { 0, 0x3d, 0x3f }, { 1, 0x3c, 0x3f } },
+  { { 0, 0x3e, 0x3e }, { 1, 0x3d, 0x3f } },
+  { { 0, 0x3e, 0x3f }, { 1, 0x3d, 0x3f } },
+  { { 1, 0x3e, 0x3f }, { 0, 0x3e, 0x3f } },
+  { { 0, 0x3f, 0x3f }, { 1, 0x3e, 0x3f } },
+  { { 0, 0x3f, 0x3f }, { 1, 0x3f, 0x3f } }
 };
diff --git a/BPTCEncoder/src/BitStream.h b/BPTCEncoder/src/BitStream.h
index d502ef3..3f30d6d 100755
--- a/BPTCEncoder/src/BitStream.h
+++ b/BPTCEncoder/src/BitStream.h
@@ -77,7 +77,7 @@ class BitStream {
   { }
 
   int GetBitsWritten() const { return m_BitsWritten; }
-	  
+
   ~BitStream() { }
   void WriteBitsR(unsigned int val, unsigned int nBits) {
     for(unsigned int i = 0; i < nBits; i++) {
diff --git a/BPTCEncoder/src/RGBAEndpoints.cpp b/BPTCEncoder/src/RGBAEndpoints.cpp
index cc9ba6e..8b2d5ef 100755
--- a/BPTCEncoder/src/RGBAEndpoints.cpp
+++ b/BPTCEncoder/src/RGBAEndpoints.cpp
@@ -89,22 +89,22 @@ static T max(const T &a, const T &b) {
 
 static const double kPi = 3.141592653589793238462643383279502884197;
 static const float kFloatConversion[256] = {
-	0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 
-	16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 
-	32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f, 
-	48.0f, 49.0f, 50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, 61.0f, 62.0f, 63.0f, 
-	64.0f, 65.0f, 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, 76.0f, 77.0f, 78.0f, 79.0f, 
-	80.0f, 81.0f, 82.0f, 83.0f, 84.0f, 85.0f, 86.0f, 87.0f, 88.0f, 89.0f, 90.0f, 91.0f, 92.0f, 93.0f, 94.0f, 95.0f, 
-	96.0f, 97.0f, 98.0f, 99.0f, 100.0f, 101.0f, 102.0f, 103.0f, 104.0f, 105.0f, 106.0f, 107.0f, 108.0f, 109.0f, 110.0f, 111.0f, 
-	112.0f, 113.0f, 114.0f, 115.0f, 116.0f, 117.0f, 118.0f, 119.0f, 120.0f, 121.0f, 122.0f, 123.0f, 124.0f, 125.0f, 126.0f, 127.0f, 
-	128.0f, 129.0f, 130.0f, 131.0f, 132.0f, 133.0f, 134.0f, 135.0f, 136.0f, 137.0f, 138.0f, 139.0f, 140.0f, 141.0f, 142.0f, 143.0f, 
-	144.0f, 145.0f, 146.0f, 147.0f, 148.0f, 149.0f, 150.0f, 151.0f, 152.0f, 153.0f, 154.0f, 155.0f, 156.0f, 157.0f, 158.0f, 159.0f, 
-	160.0f, 161.0f, 162.0f, 163.0f, 164.0f, 165.0f, 166.0f, 167.0f, 168.0f, 169.0f, 170.0f, 171.0f, 172.0f, 173.0f, 174.0f, 175.0f, 
-	176.0f, 177.0f, 178.0f, 179.0f, 180.0f, 181.0f, 182.0f, 183.0f, 184.0f, 185.0f, 186.0f, 187.0f, 188.0f, 189.0f, 190.0f, 191.0f, 
-	192.0f, 193.0f, 194.0f, 195.0f, 196.0f, 197.0f, 198.0f, 199.0f, 200.0f, 201.0f, 202.0f, 203.0f, 204.0f, 205.0f, 206.0f, 207.0f, 
-	208.0f, 209.0f, 210.0f, 211.0f, 212.0f, 213.0f, 214.0f, 215.0f, 216.0f, 217.0f, 218.0f, 219.0f, 220.0f, 221.0f, 222.0f, 223.0f, 
-	224.0f, 225.0f, 226.0f, 227.0f, 228.0f, 229.0f, 230.0f, 231.0f, 232.0f, 233.0f, 234.0f, 235.0f, 236.0f, 237.0f, 238.0f, 239.0f, 
-	240.0f, 241.0f, 242.0f, 243.0f, 244.0f, 245.0f, 246.0f, 247.0f, 248.0f, 249.0f, 250.0f, 251.0f, 252.0f, 253.0f, 254.0f, 255.0f
+  0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 
+  16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 
+  32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f, 
+  48.0f, 49.0f, 50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, 61.0f, 62.0f, 63.0f, 
+  64.0f, 65.0f, 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, 76.0f, 77.0f, 78.0f, 79.0f, 
+  80.0f, 81.0f, 82.0f, 83.0f, 84.0f, 85.0f, 86.0f, 87.0f, 88.0f, 89.0f, 90.0f, 91.0f, 92.0f, 93.0f, 94.0f, 95.0f, 
+  96.0f, 97.0f, 98.0f, 99.0f, 100.0f, 101.0f, 102.0f, 103.0f, 104.0f, 105.0f, 106.0f, 107.0f, 108.0f, 109.0f, 110.0f, 111.0f, 
+  112.0f, 113.0f, 114.0f, 115.0f, 116.0f, 117.0f, 118.0f, 119.0f, 120.0f, 121.0f, 122.0f, 123.0f, 124.0f, 125.0f, 126.0f, 127.0f, 
+  128.0f, 129.0f, 130.0f, 131.0f, 132.0f, 133.0f, 134.0f, 135.0f, 136.0f, 137.0f, 138.0f, 139.0f, 140.0f, 141.0f, 142.0f, 143.0f, 
+  144.0f, 145.0f, 146.0f, 147.0f, 148.0f, 149.0f, 150.0f, 151.0f, 152.0f, 153.0f, 154.0f, 155.0f, 156.0f, 157.0f, 158.0f, 159.0f, 
+  160.0f, 161.0f, 162.0f, 163.0f, 164.0f, 165.0f, 166.0f, 167.0f, 168.0f, 169.0f, 170.0f, 171.0f, 172.0f, 173.0f, 174.0f, 175.0f, 
+  176.0f, 177.0f, 178.0f, 179.0f, 180.0f, 181.0f, 182.0f, 183.0f, 184.0f, 185.0f, 186.0f, 187.0f, 188.0f, 189.0f, 190.0f, 191.0f, 
+  192.0f, 193.0f, 194.0f, 195.0f, 196.0f, 197.0f, 198.0f, 199.0f, 200.0f, 201.0f, 202.0f, 203.0f, 204.0f, 205.0f, 206.0f, 207.0f, 
+  208.0f, 209.0f, 210.0f, 211.0f, 212.0f, 213.0f, 214.0f, 215.0f, 216.0f, 217.0f, 218.0f, 219.0f, 220.0f, 221.0f, 222.0f, 223.0f, 
+  224.0f, 225.0f, 226.0f, 227.0f, 228.0f, 229.0f, 230.0f, 231.0f, 232.0f, 233.0f, 234.0f, 235.0f, 236.0f, 237.0f, 238.0f, 239.0f, 
+  240.0f, 241.0f, 242.0f, 243.0f, 244.0f, 245.0f, 246.0f, 247.0f, 248.0f, 249.0f, 250.0f, 251.0f, 252.0f, 253.0f, 254.0f, 255.0f
 };
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -115,41 +115,41 @@ static const float kFloatConversion[256] = {
 static inline uint32 CountBitsInMask(uint8 n) {
 
 #if defined(_WIN64) || defined(__x86_64__) || defined(NO_INLINE_ASSEMBLY)
-	if(!n) return 0; // no bits set
-	if(!(n & (n-1))) return 1; // power of two
+  if(!n) return 0; // no bits set
+  if(!(n & (n-1))) return 1; // power of two
 
-	uint32 c;
-	for(c = 0; n; c++) {
-		n &= n - 1;
-	}
-	return c;
+  uint32 c;
+  for(c = 0; n; c++) {
+    n &= n - 1;
+  }
+  return c;
 #else
 #ifdef _MSC_VER
-	__asm {
-		mov eax, 8
-		movzx ecx, n
-		bsf ecx, ecx
-		sub eax, ecx
+  __asm {
+    mov eax, 8
+    movzx ecx, n
+    bsf ecx, ecx
+    sub eax, ecx
         }
 #else
-	uint32 ans;
-	__asm__("movl $8, %%eax;"
-		"movzbl %b1, %%ecx;"
-		"bsf %%ecx, %%ecx;"
-		"subl %%ecx, %%eax;"
-		"movl %%eax, %0;"
-		: "=Q"(ans)
-		: "b"(n)
-		: "%eax", "%ecx"
-	);
-	return ans;
-#endif	
+  uint32 ans;
+  __asm__("movl $8, %%eax;"
+    "movzbl %b1, %%ecx;"
+    "bsf %%ecx, %%ecx;"
+    "subl %%ecx, %%eax;"
+    "movl %%eax, %0;"
+    : "=Q"(ans)
+    : "b"(n)
+    : "%eax", "%ecx"
+  );
+  return ans;
+#endif  
 #endif
 }
 
 template <typename ty>
 static inline void clamp(ty &x, const ty &min, const ty &max) {
-	x = (x < min)? min : ((x > max)? max : x);
+  x = (x < min)? min : ((x > max)? max : x);
 }
 
 // absolute distance. It turns out the compiler does a much
@@ -157,23 +157,23 @@ static inline void clamp(ty &x, const ty &min, const ty &max) {
 // translate the values to/from registers
 static uint8 sad(uint8 a, uint8 b) {
 #if 0
-	__asm
-	{
-		movzx eax, a
-		movzx ecx, b
-		sub eax, ecx
-		jns done
-		neg eax
+  __asm
+  {
+    movzx eax, a
+    movzx ecx, b
+    sub eax, ecx
+    jns done
+    neg eax
 done:
-	}
+  }
 #else
-	//const INT d = a - b;
-	//const INT mask = d >> 31;
-	//return (d ^ mask) - mask;
+  //const INT d = a - b;
+  //const INT mask = d >> 31;
+  //return (d ^ mask) - mask;
 
-	// return abs(a - b);
+  // return abs(a - b);
 
-	return (a > b)? a - b : b - a;
+  return (a > b)? a - b : b - a;
 
 #endif
 }
@@ -186,55 +186,55 @@ done:
 
 uint8 QuantizeChannel(const uint8 val, const uint8 mask, const int pBit) {
 
-	// If the mask is all the bits, then we can just return the value.
-	if(mask == 0xFF) {
+  // If the mask is all the bits, then we can just return the value.
+  if(mask == 0xFF) {
           return val;
-	}
+  }
 
-        // Otherwise if the mask is no bits then we'll assume that they want
-        // all the bits ... this is only really relevant for alpha...
-        if(mask == 0x0) {
-          return 0xFF;
-        }
+  // Otherwise if the mask is no bits then we'll assume that they want
+  // all the bits ... this is only really relevant for alpha...
+  if(mask == 0x0) {
+    return 0xFF;
+  }
 
-	uint32 prec = CountBitsInMask(mask);
-	const uint32 step = 1 << (8 - prec);
+  uint32 prec = CountBitsInMask(mask);
+  const uint32 step = 1 << (8 - prec);
 
-	assert(step-1 == uint8(~mask));
+  assert(step-1 == uint8(~mask));
 
-	uint32 lval = val & mask;
-	uint32 hval = lval + step;
+  uint32 lval = val & mask;
+  uint32 hval = lval + step;
 
-	if(pBit >= 0) {
-		prec++;
-		lval |= !!(pBit) << (8 - prec);
-		hval |= !!(pBit) << (8 - prec);
-	}
+  if(pBit >= 0) {
+    prec++;
+    lval |= !!(pBit) << (8 - prec);
+    hval |= !!(pBit) << (8 - prec);
+  }
 
-	if(lval > val) {
-		lval -= step;
-		hval -= step;
-	}
+  if(lval > val) {
+    lval -= step;
+    hval -= step;
+  }
 
-	lval |= lval >> prec;
-	hval |= hval >> prec;
+  lval |= lval >> prec;
+  hval |= hval >> prec;
 
-	if(sad(val, lval) < sad(val, hval))
-		return lval;
-	else
-		return hval;
+  if(sad(val, lval) < sad(val, hval))
+    return lval;
+  else
+    return hval;
 }
 
 uint32 RGBAVector::ToPixel(const uint32 channelMask, const int pBit) const {
 
-	const uint8 pRet0 = QuantizeChannel(uint32(r + 0.5) & 0xFF, channelMask & 0xFF, pBit);
-	const uint8 pRet1 = QuantizeChannel(uint32(g + 0.5) & 0xFF, (channelMask >> 8) & 0xFF, pBit);
-	const uint8 pRet2 = QuantizeChannel(uint32(b + 0.5) & 0xFF, (channelMask >> 16) & 0xFF, pBit);
-	const uint8 pRet3 = QuantizeChannel(uint32(a + 0.5) & 0xFF, (channelMask >> 24) & 0xFF, pBit);
+  const uint8 pRet0 = QuantizeChannel(uint32(r + 0.5) & 0xFF, channelMask & 0xFF, pBit);
+  const uint8 pRet1 = QuantizeChannel(uint32(g + 0.5) & 0xFF, (channelMask >> 8) & 0xFF, pBit);
+  const uint8 pRet2 = QuantizeChannel(uint32(b + 0.5) & 0xFF, (channelMask >> 16) & 0xFF, pBit);
+  const uint8 pRet3 = QuantizeChannel(uint32(a + 0.5) & 0xFF, (channelMask >> 24) & 0xFF, pBit);
 
-	const uint32 ret = pRet0 | (pRet1 << 8) | (pRet2 << 16) | (pRet3 << 24);
+  const uint32 ret = pRet0 | (pRet1 << 8) | (pRet2 << 16) | (pRet3 << 24);
 
-	return ret;
+  return ret;
 }
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -244,85 +244,85 @@ uint32 RGBAVector::ToPixel(const uint32 channelMask, const int pBit) const {
 ///////////////////////////////////////////////////////////////////////////////
 
 RGBAMatrix &RGBAMatrix::operator *=(const RGBAMatrix &mat) {
-	*this = ((*this) * mat);
-	return (*this);
+  *this = ((*this) * mat);
+  return (*this);
 }
 
 RGBAMatrix RGBAMatrix::operator *(const RGBAMatrix &mat) const {
 
-	RGBAMatrix result;
+  RGBAMatrix result;
 
-	for(int i = 0; i < 4; i++) {
-		for(int j = 0; j < 4; j++) {
+  for(int i = 0; i < 4; i++) {
+    for(int j = 0; j < 4; j++) {
 
-			result(i, j) = 0.0f;
-			for(int k = 0; k < 4; k++) {
-				result(i, j) += m[i*4 + k] * mat.m[k*4 + j];
-			}
-		}
-	}
+      result(i, j) = 0.0f;
+      for(int k = 0; k < 4; k++) {
+        result(i, j) += m[i*4 + k] * mat.m[k*4 + j];
+      }
+    }
+  }
 
-	return result;
+  return result;
 }
 
 RGBAVector RGBAMatrix::operator *(const RGBAVector &p) const {
-	return RGBAVector (
-		p.x * m1 + p.y * m2 + p.z * m3 + p.w * m4,
-		p.x * m5 + p.y * m6 + p.z * m7 + p.w * m8,
-		p.x * m9 + p.y * m10 + p.z * m11 + p.w * m12,
-		p.x * m13 + p.y * m14 + p.z * m15 + p.w * m16
-	);
+  return RGBAVector (
+    p.x * m1 + p.y * m2 + p.z * m3 + p.w * m4,
+    p.x * m5 + p.y * m6 + p.z * m7 + p.w * m8,
+    p.x * m9 + p.y * m10 + p.z * m11 + p.w * m12,
+    p.x * m13 + p.y * m14 + p.z * m15 + p.w * m16
+  );
 }
 
 RGBAMatrix RGBAMatrix::RotateX(float rad) {
-	RGBAMatrix result;
-	result.m6 = result.m11 = cos(rad);
-	result.m10 = sin(rad);
-	result.m7 = -result.m10;
-	return result;
+  RGBAMatrix result;
+  result.m6 = result.m11 = cos(rad);
+  result.m10 = sin(rad);
+  result.m7 = -result.m10;
+  return result;
 }
 
 RGBAMatrix RGBAMatrix::RotateY(float rad) {
-	RGBAMatrix result;
-	result.m1 = result.m11 = cos(rad);
-	result.m3 = sin(rad);
-	result.m9 = -result.m3;
-	return result;
+  RGBAMatrix result;
+  result.m1 = result.m11 = cos(rad);
+  result.m3 = sin(rad);
+  result.m9 = -result.m3;
+  return result;
 }
 
 RGBAMatrix RGBAMatrix::RotateZ(float rad) {
-	RGBAMatrix result;
-	result.m1 = result.m6 = cos(rad);
-	result.m5 = sin(rad);
-	result.m2 = -result.m5;
-	return result;
+  RGBAMatrix result;
+  result.m1 = result.m6 = cos(rad);
+  result.m5 = sin(rad);
+  result.m2 = -result.m5;
+  return result;
 }
 
 RGBAMatrix RGBAMatrix::Translate(const RGBAVector &t) {
-	RGBAMatrix result;
-	result.m4 = t.x;
-	result.m8 = t.y;
-	result.m12 = t.z;
-	result.m16 = t.w;
-	return result;
+  RGBAMatrix result;
+  result.m4 = t.x;
+  result.m8 = t.y;
+  result.m12 = t.z;
+  result.m16 = t.w;
+  return result;
 }
 
 bool RGBAMatrix::Identity() {
-	for(int i = 0; i < 4; i++) {
-		for(int j = 0; j < 4; j++) {
+  for(int i = 0; i < 4; i++) {
+    for(int j = 0; j < 4; j++) {
 
-			if(i == j) {
-				if(fabs(m[i*4 + j] - 1.0f) > 1e-5)
-					return false;
-			}
-			else {
-				if(fabs(m[i*4 + j]) > 1e-5)
-					return false;
-			}
-		}
-	}
+      if(i == j) {
+        if(fabs(m[i*4 + j] - 1.0f) > 1e-5)
+          return false;
+      }
+      else {
+        if(fabs(m[i*4 + j]) > 1e-5)
+          return false;
+      }
+    }
+  }
 
-	return true;
+  return true;
 }
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -332,45 +332,45 @@ bool RGBAMatrix::Identity() {
 ///////////////////////////////////////////////////////////////////////////////
 
 RGBACluster::RGBACluster(const RGBACluster &left, const RGBACluster &right) {
-	*this = left;
-	for(uint32 i = 0; i < right.m_NumPoints; i++) {
-		const RGBAVector &p = right.m_DataPoints[i];
-		AddPoint(p);
-	}
+  *this = left;
+  for(uint32 i = 0; i < right.m_NumPoints; i++) {
+    const RGBAVector &p = right.m_DataPoints[i];
+    AddPoint(p);
+  }
 
-	m_PrincipalAxisCached = false;
-}	
+  m_PrincipalAxisCached = false;
+}  
 
 void RGBACluster::AddPoint(const RGBAVector &p) {
-	assert(m_NumPoints < kMaxNumDataPoints);
-	m_Total += p;
-	m_DataPoints[m_NumPoints++] = p;
-	m_PointBitString |= 1 << p.GetIdx();
+  assert(m_NumPoints < kMaxNumDataPoints);
+  m_Total += p;
+  m_DataPoints[m_NumPoints++] = p;
+  m_PointBitString |= 1 << p.GetIdx();
 
-	for(uint32 i = 0; i < kNumColorChannels; i++) {
-		m_Min.c[i] = min(p.c[i], m_Min.c[i]);
-		m_Max.c[i] = max(p.c[i], m_Max.c[i]);
-	}
+  for(uint32 i = 0; i < kNumColorChannels; i++) {
+    m_Min.c[i] = min(p.c[i], m_Min.c[i]);
+    m_Max.c[i] = max(p.c[i], m_Max.c[i]);
+  }
 }
 
 void RGBACluster::GetPrincipalAxis(RGBADir &axis) {
 
-	if(m_PrincipalAxisCached) {
-		axis = m_PrincipalAxis;
-		return;
-	}
+  if(m_PrincipalAxisCached) {
+    axis = m_PrincipalAxis;
+    return;
+  }
 
-	m_PowerMethodIterations = ::GetPrincipalAxis(
-	    m_NumPoints, 
-	    m_DataPoints, 
-	    m_PrincipalAxis, 
-	    m_PrincipalEigenvalue, 
-	    &m_SecondEigenvalue
-	);
+  m_PowerMethodIterations = ::GetPrincipalAxis(
+    m_NumPoints, 
+    m_DataPoints, 
+    m_PrincipalAxis, 
+    m_PrincipalEigenvalue, 
+    &m_SecondEigenvalue
+  );
 
-	m_PrincipalAxisCached = true;
+  m_PrincipalAxisCached = true;
 
-	GetPrincipalAxis(axis);
+  GetPrincipalAxis(axis);
 }
 
 double RGBACluster::GetPrincipalEigenvalue() {
@@ -408,74 +408,74 @@ uint32 RGBACluster::GetPowerMethodIterations() {
 
 double RGBACluster::QuantizedError(const RGBAVector &p1, const RGBAVector &p2, uint8 nBuckets, uint32 bitMask, const RGBAVector &errorMetricVec, const int pbits[2], int *indices) const {
 
-	// nBuckets should be a power of two.
-	assert(nBuckets == 3 || !(nBuckets & (nBuckets - 1)));
+  // nBuckets should be a power of two.
+  assert(nBuckets == 3 || !(nBuckets & (nBuckets - 1)));
 
-	const uint8 indexPrec = (nBuckets == 3)? 3 : 8-CountBitsInMask(~(nBuckets - 1));
-	
-	typedef uint32 tInterpPair[2];
-	typedef tInterpPair tInterpLevel[16];
-	const tInterpLevel *interpVals = (nBuckets == 3)? kBC7InterpolationValues : kBC7InterpolationValues + (indexPrec - 1);
+  const uint8 indexPrec = (nBuckets == 3)? 3 : 8-CountBitsInMask(~(nBuckets - 1));
+  
+  typedef uint32 tInterpPair[2];
+  typedef tInterpPair tInterpLevel[16];
+  const tInterpLevel *interpVals = (nBuckets == 3)? kBC7InterpolationValues : kBC7InterpolationValues + (indexPrec - 1);
 
-	assert(indexPrec >= 2 && indexPrec <= 4);
+  assert(indexPrec >= 2 && indexPrec <= 4);
 
-	uint32 qp1, qp2;
-	if(pbits) {
-		qp1 = p1.ToPixel(bitMask, pbits[0]);
-		qp2 = p2.ToPixel(bitMask, pbits[1]);
-	}
-	else {
-		qp1 = p1.ToPixel(bitMask);
-		qp2 = p2.ToPixel(bitMask);
-	}
+  uint32 qp1, qp2;
+  if(pbits) {
+    qp1 = p1.ToPixel(bitMask, pbits[0]);
+    qp2 = p2.ToPixel(bitMask, pbits[1]);
+  }
+  else {
+    qp1 = p1.ToPixel(bitMask);
+    qp2 = p2.ToPixel(bitMask);
+  }
 
-	uint8 *pqp1 = (uint8 *)&qp1;
-	uint8 *pqp2 = (uint8 *)&qp2;
+  uint8 *pqp1 = (uint8 *)&qp1;
+  uint8 *pqp2 = (uint8 *)&qp2;
 
-	const RGBAVector metric = errorMetricVec;
+  const RGBAVector metric = errorMetricVec;
 
-	float totalError = 0.0;
-	for(uint32 i = 0; i < m_NumPoints; i++) {
+  float totalError = 0.0;
+  for(uint32 i = 0; i < m_NumPoints; i++) {
 
-		const uint32 pixel = m_DataPoints[i].ToPixel();
-		const uint8 *pb = (const uint8 *)(&pixel);
+    const uint32 pixel = m_DataPoints[i].ToPixel();
+    const uint8 *pb = (const uint8 *)(&pixel);
 
-		float minError = FLT_MAX;
-		int bestBucket = -1;
-		for(int j = 0; j < nBuckets; j++) {
+    float minError = FLT_MAX;
+    int bestBucket = -1;
+    for(int j = 0; j < nBuckets; j++) {
 
-			uint32 interp0 = (*interpVals)[j][0];
-			uint32 interp1 = (*interpVals)[j][1];
+      uint32 interp0 = (*interpVals)[j][0];
+      uint32 interp1 = (*interpVals)[j][1];
 
-			RGBAVector errorVec (0.0f);
-			for(uint32 k = 0; k < kNumColorChannels; k++) {
-				const uint8 ip = (((uint32(pqp1[k]) * interp0) + (uint32(pqp2[k]) * interp1) + 32) >> 6) & 0xFF;
-				const uint8 dist = sad(pb[k], ip);
-				errorVec.c[k] = kFloatConversion[dist] * metric.c[k];
-			}
-			
-			float error = errorVec * errorVec;
-			if(error < minError) {
-				minError = error;
-				bestBucket = j;
-			}
+      RGBAVector errorVec (0.0f);
+      for(uint32 k = 0; k < kNumColorChannels; k++) {
+        const uint8 ip = (((uint32(pqp1[k]) * interp0) + (uint32(pqp2[k]) * interp1) + 32) >> 6) & 0xFF;
+        const uint8 dist = sad(pb[k], ip);
+        errorVec.c[k] = kFloatConversion[dist] * metric.c[k];
+      }
+      
+      float error = errorVec * errorVec;
+      if(error < minError) {
+        minError = error;
+        bestBucket = j;
+      }
 
-			// Conceptually, once the error starts growing, it doesn't stop growing (we're moving
-			// farther away from the reference point along the line). Hence we can early out here.
-			// However, quanitzation artifacts mean that this is not ALWAYS the case, so we do suffer
-			// about 0.01 RMS error. 
-			else if(error > minError) {
-				break;
-			}
-		}
+      // Conceptually, once the error starts growing, it doesn't stop growing (we're moving
+      // farther away from the reference point along the line). Hence we can early out here.
+      // However, quanitzation artifacts mean that this is not ALWAYS the case, so we do suffer
+      // about 0.01 RMS error. 
+      else if(error > minError) {
+        break;
+      }
+    }
 
-		totalError += minError;
+    totalError += minError;
 
-		assert(bestBucket >= 0);
-		if(indices) indices[i] = bestBucket;
-	}
+    assert(bestBucket >= 0);
+    if(indices) indices[i] = bestBucket;
+  }
 
-	return totalError;
+  return totalError;
 }
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -485,175 +485,174 @@ double RGBACluster::QuantizedError(const RGBAVector &p1, const RGBAVector &p2, u
 ///////////////////////////////////////////////////////////////////////////////
 
 void ClampEndpoints(RGBAVector &p1, RGBAVector &p2) {
-	clamp(p1.r, 0.0f, 255.0f);
-	clamp(p1.g, 0.0f, 255.0f);
-	clamp(p1.b, 0.0f, 255.0f);
-	clamp(p1.a, 0.0f, 255.0f);
+  clamp(p1.r, 0.0f, 255.0f);
+  clamp(p1.g, 0.0f, 255.0f);
+  clamp(p1.b, 0.0f, 255.0f);
+  clamp(p1.a, 0.0f, 255.0f);
 
-	clamp(p2.r, 0.0f, 255.0f);
-	clamp(p2.g, 0.0f, 255.0f);
-	clamp(p2.b, 0.0f, 255.0f);
-	clamp(p2.a, 0.0f, 255.0f);
+  clamp(p2.r, 0.0f, 255.0f);
+  clamp(p2.g, 0.0f, 255.0f);
+  clamp(p2.b, 0.0f, 255.0f);
+  clamp(p2.a, 0.0f, 255.0f);
 }
 
 static uint32 PowerIteration(const RGBAMatrix &mat, RGBADir &eigVec, double &eigVal) {
 
-	int numIterations = 0;
-	const int kMaxNumIterations = 200;
+  int numIterations = 0;
+  const int kMaxNumIterations = 200;
 
-	for(int nTries = 0; nTries < 3; nTries++) {
-	// !SPEED! Find eigenvectors by using the power method. This is good because the
-	// matrix is only 4x4, which allows us to use SIMD...
+  for(int nTries = 0; nTries < 3; nTries++) {
+  // !SPEED! Find eigenvectors by using the power method. This is good because the
+  // matrix is only 4x4, which allows us to use SIMD...
   RGBAVector b = RGBAVector(float(rand()) + 1.0f);
   b /= b.Length();
 
-	bool fixed = false;
-	numIterations = 0;
-	while(!fixed && ++numIterations < kMaxNumIterations) {
+  bool fixed = false;
+  numIterations = 0;
+  while(!fixed && ++numIterations < kMaxNumIterations) {
 
-		RGBAVector newB = mat * b;
+    RGBAVector newB = mat * b;
 
-		// !HACK! If the principal eigenvector of the covariance matrix
-		// converges to zero, that means that the points lie equally 
-		// spaced on a sphere in this space. In this (extremely rare)
-		// situation, just choose a point and use it as the principal 
-		// direction.
-		const float newBlen = newB.Length();
-		if(newBlen < 1e-10) {
-			eigVec = b;
-			eigVal = 0.0;
-			return numIterations;
-		}
+    // !HACK! If the principal eigenvector of the covariance matrix
+    // converges to zero, that means that the points lie equally 
+    // spaced on a sphere in this space. In this (extremely rare)
+    // situation, just choose a point and use it as the principal 
+    // direction.
+    const float newBlen = newB.Length();
+    if(newBlen < 1e-10) {
+      eigVec = b;
+      eigVal = 0.0;
+      return numIterations;
+    }
 
-		eigVal = newB.Length();
-		newB /= float(eigVal);
+    eigVal = newB.Length();
+    newB /= float(eigVal);
 
-		if(fabs(1.0f - (b * newB)) < 1e-5)
-			fixed = true;
+    if(fabs(1.0f - (b * newB)) < 1e-5)
+      fixed = true;
 
-		b = newB;
-	}
-
-	eigVec = b;  
-	if(numIterations < kMaxNumIterations) {
-	  break;
-	}
+    b = newB;
   }
 
-	if(numIterations == kMaxNumIterations) {
-	  eigVal = 0.0;
-	}
-	return numIterations;
+  eigVec = b;  
+  if(numIterations < kMaxNumIterations) {
+    break;
+  }
+  }
+
+  if(numIterations == kMaxNumIterations) {
+    eigVal = 0.0;
+  }
+  return numIterations;
 }
 
 uint32 GetPrincipalAxis(uint32 nPts, const RGBAVector *pts, RGBADir &axis, double &eigOne, double *eigTwo) {
 
-	assert(nPts <= kMaxNumDataPoints);
+  assert(nPts <= kMaxNumDataPoints);
 
-	RGBAVector avg (0.0f);
-	for(uint32 i = 0; i < nPts; i++) {
-		avg += pts[i];
-	}
-	avg /= float(nPts);
+  RGBAVector avg (0.0f);
+  for(uint32 i = 0; i < nPts; i++) {
+    avg += pts[i];
+  }
+  avg /= float(nPts);
 
-	// We use these vectors for calculating the covariance matrix...
-	RGBAVector toPts[kMaxNumDataPoints];
-	RGBAVector toPtsMax(-FLT_MAX);
-	for(uint32 i = 0; i < nPts; i++) {
-		toPts[i] = pts[i] - avg;
+  // We use these vectors for calculating the covariance matrix...
+  RGBAVector toPts[kMaxNumDataPoints];
+  RGBAVector toPtsMax(-FLT_MAX);
+  for(uint32 i = 0; i < nPts; i++) {
+    toPts[i] = pts[i] - avg;
 
-		for(uint32 j = 0; j < kNumColorChannels; j++) {
-			toPtsMax.c[j] = max(toPtsMax.c[j], toPts[i].c[j]);
-		}
-	}
+    for(uint32 j = 0; j < kNumColorChannels; j++) {
+      toPtsMax.c[j] = max(toPtsMax.c[j], toPts[i].c[j]);
+    }
+  }
 
-	// Generate a list of unique points...
-	RGBAVector upts[kMaxNumDataPoints];
-	uint32 uptsIdx = 0;
-	for(uint32 i = 0; i < nPts; i++) {
-		
-		bool hasPt = false;
-		for(uint32 j = 0; j < uptsIdx; j++) {
-			if(upts[j] == pts[i])
-				hasPt = true;
-		}
+  // Generate a list of unique points...
+  RGBAVector upts[kMaxNumDataPoints];
+  uint32 uptsIdx = 0;
+  for(uint32 i = 0; i < nPts; i++) {
+    
+    bool hasPt = false;
+    for(uint32 j = 0; j < uptsIdx; j++) {
+      if(upts[j] == pts[i])
+        hasPt = true;
+    }
 
-		if(!hasPt) {
-			upts[uptsIdx++] = pts[i];
-		}
-	}
+    if(!hasPt) {
+      upts[uptsIdx++] = pts[i];
+    }
+  }
 
-	assert(uptsIdx > 0);
+  assert(uptsIdx > 0);
 
-	if(uptsIdx == 1) {
-		axis.r = axis.g = axis.b = axis.a = 0.0f;
-		return 0;
-	}
-	// Collinear?
-	else {
+  if(uptsIdx == 1) {
+    axis.r = axis.g = axis.b = axis.a = 0.0f;
+    return 0;
 
-		RGBADir dir (upts[1] - upts[0]);
-		bool collinear = true;
-		for(uint32 i = 2; i < nPts; i++) {
-			RGBAVector v = (upts[i] - upts[0]);
-			if(fabs(fabs(v*dir) - v.Length()) > 1e-7) {
-				collinear = false;
-				break;
-			}
-		}
+  // Collinear?
+  } else {
+    RGBADir dir (upts[1] - upts[0]);
+    bool collinear = true;
+    for(uint32 i = 2; i < nPts; i++) {
+      RGBAVector v = (upts[i] - upts[0]);
+      if(fabs(fabs(v*dir) - v.Length()) > 1e-7) {
+        collinear = false;
+        break;
+      }
+    }
 
-		if(collinear) {
-			axis = dir;
-			return 0;
-		}
-	}
+    if(collinear) {
+      axis = dir;
+      return 0;
+    }
+  }
 
-	RGBAMatrix covMatrix;
+  RGBAMatrix covMatrix;
 
-	// Compute covariance.
-	for(uint32 i = 0; i < kNumColorChannels; i++) {
-		for(uint32 j = 0; j <= i; j++) {
+  // Compute covariance.
+  for(uint32 i = 0; i < kNumColorChannels; i++) {
+    for(uint32 j = 0; j <= i; j++) {
 
-			float sum = 0.0;
-			for(uint32 k = 0; k < nPts; k++) {
-				sum += toPts[k].c[i] * toPts[k].c[j];
-			}
+      float sum = 0.0;
+      for(uint32 k = 0; k < nPts; k++) {
+        sum += toPts[k].c[i] * toPts[k].c[j];
+      }
 
-			covMatrix(i, j) = sum / kFloatConversion[kNumColorChannels - 1];
-			covMatrix(j, i) = covMatrix(i, j);
-		}
-	}
-	
-	uint32 iters = PowerIteration(covMatrix, axis, eigOne);
+      covMatrix(i, j) = sum / kFloatConversion[kNumColorChannels - 1];
+      covMatrix(j, i) = covMatrix(i, j);
+    }
+  }
+  
+  uint32 iters = PowerIteration(covMatrix, axis, eigOne);
 
-	if(NULL != eigTwo) {
-	  if(eigOne != 0.0) {
-	    RGBAMatrix reduced = covMatrix - eigOne * RGBAMatrix(
-	      axis.c[0] * axis.c[0], axis.c[0] * axis.c[1], axis.c[0] * axis.c[2], axis.c[0] * axis.c[3], 
-	      axis.c[1] * axis.c[0], axis.c[1] * axis.c[1], axis.c[1] * axis.c[2], axis.c[1] * axis.c[3],
-	      axis.c[2] * axis.c[0], axis.c[2] * axis.c[1], axis.c[2] * axis.c[2], axis.c[2] * axis.c[3],
-	      axis.c[3] * axis.c[0], axis.c[3] * axis.c[1], axis.c[3] * axis.c[2], axis.c[3] * axis.c[3]
-	    );
-	    
-	    bool allZero = true;
-	    for(uint32 i = 0; i < 16; i++) {
-	      if(fabs(reduced[i]) > 0.0005) {
-		allZero = false;
-	      }
-	    }
-
-	    if(allZero) {
-	      *eigTwo = 0.0;
-	    }
-	    else {
-	      RGBADir dummyDir;
-	      iters += PowerIteration(reduced, dummyDir, *eigTwo);
-	    }
-	  }
-	  else {
-	    *eigTwo = 0.0;
-	  }
+  if(NULL != eigTwo) {
+    if(eigOne != 0.0) {
+      RGBAMatrix reduced = covMatrix - eigOne * RGBAMatrix(
+        axis.c[0] * axis.c[0], axis.c[0] * axis.c[1], axis.c[0] * axis.c[2], axis.c[0] * axis.c[3], 
+        axis.c[1] * axis.c[0], axis.c[1] * axis.c[1], axis.c[1] * axis.c[2], axis.c[1] * axis.c[3],
+        axis.c[2] * axis.c[0], axis.c[2] * axis.c[1], axis.c[2] * axis.c[2], axis.c[2] * axis.c[3],
+        axis.c[3] * axis.c[0], axis.c[3] * axis.c[1], axis.c[3] * axis.c[2], axis.c[3] * axis.c[3]
+      );
+      
+      bool allZero = true;
+      for(uint32 i = 0; i < 16; i++) {
+        if(fabs(reduced[i]) > 0.0005) {
+          allZero = false;
         }
+      }
 
-	return iters;
+      if(allZero) {
+        *eigTwo = 0.0;
+      }
+      else {
+        RGBADir dummyDir;
+        iters += PowerIteration(reduced, dummyDir, *eigTwo);
+      }
+    }
+    else {
+      *eigTwo = 0.0;
+    }
+  }
+
+  return iters;
 }
diff --git a/BPTCEncoder/src/RGBAEndpoints.h b/BPTCEncoder/src/RGBAEndpoints.h
index 8617890..418c0d6 100755
--- a/BPTCEncoder/src/RGBAEndpoints.h
+++ b/BPTCEncoder/src/RGBAEndpoints.h
@@ -78,260 +78,260 @@ static const uint32 kMaxNumDataPoints = 16;
 class RGBAVector {
 
 public:
-	union {
-		struct { float r, g, b, a; };
-		struct { float x, y, z, w; };
-		float c[4];
-	};
+  union {
+    struct { float r, g, b, a; };
+    struct { float x, y, z, w; };
+    float c[4];
+  };
 
-	uint32 GetIdx() const { return  idx; }
+  uint32 GetIdx() const { return  idx; }
 
-	RGBAVector() : r(-1.0), g(-1.0), b(-1.0), a(-1.0) { }
-	RGBAVector(uint32 _idx, uint32 pixel) : 
-		r(float(pixel & 0xFF)), 
-		g(float((pixel >> 8) & 0xFF)), 
-		b(float((pixel >> 16) & 0xFF)), 
-		a(float((pixel >> 24) & 0xFF)),
-		idx(_idx)
-	{ }
+  RGBAVector() : r(-1.0), g(-1.0), b(-1.0), a(-1.0) { }
+  RGBAVector(uint32 _idx, uint32 pixel) : 
+    r(float(pixel & 0xFF)), 
+    g(float((pixel >> 8) & 0xFF)), 
+    b(float((pixel >> 16) & 0xFF)), 
+    a(float((pixel >> 24) & 0xFF)),
+    idx(_idx)
+  { }
 
-	RGBAVector(float _r, float _g, float _b, float _a) :
-		r(_r), g(_g), b(_b), a(_a), idx(0) { }
+  RGBAVector(float _r, float _g, float _b, float _a) :
+    r(_r), g(_g), b(_b), a(_a), idx(0) { }
 
-	explicit RGBAVector(float cc) : r(cc), g(cc), b(cc), a(cc), idx(0) { }
+  explicit RGBAVector(float cc) : r(cc), g(cc), b(cc), a(cc), idx(0) { }
 
-	RGBAVector &operator =(const RGBAVector &other) {
-		this->idx = other.idx;
-		memcpy(c, other.c, sizeof(c));
-		return (*this);
-	}
+  RGBAVector &operator =(const RGBAVector &other) {
+    this->idx = other.idx;
+    memcpy(c, other.c, sizeof(c));
+    return (*this);
+  }
 
-	RGBAVector operator +(const RGBAVector &p) const {
-		return RGBAVector(r + p.r, g + p.g, b + p.b, a + p.a);
-	}
+  RGBAVector operator +(const RGBAVector &p) const {
+    return RGBAVector(r + p.r, g + p.g, b + p.b, a + p.a);
+  }
 
-	RGBAVector &operator +=(const RGBAVector &p) {
-		r += p.r; g += p.g; b += p.b; a += p.a;
-		return *this;
-	}
+  RGBAVector &operator +=(const RGBAVector &p) {
+    r += p.r; g += p.g; b += p.b; a += p.a;
+    return *this;
+  }
 
-	RGBAVector operator -(const RGBAVector &p) const {
-		return RGBAVector(r - p.r, g - p.g, b - p.b, a - p.a);
-	}
+  RGBAVector operator -(const RGBAVector &p) const {
+    return RGBAVector(r - p.r, g - p.g, b - p.b, a - p.a);
+  }
 
-	RGBAVector &operator -=(const RGBAVector &p) {
-		r -= p.r; g -= p.g; b -= p.b; a -= p.a;
-		return *this;
-	}
+  RGBAVector &operator -=(const RGBAVector &p) {
+    r -= p.r; g -= p.g; b -= p.b; a -= p.a;
+    return *this;
+  }
 
-	RGBAVector operator /(const float s) const {
-		return RGBAVector(r / s, g / s, b / s, a / s);
-	}
+  RGBAVector operator /(const float s) const {
+    return RGBAVector(r / s, g / s, b / s, a / s);
+  }
 
-	RGBAVector &operator /=(const float s) {
-		r /= s; g /= s; b /= s; a /= s;
-		return *this;
-	}
+  RGBAVector &operator /=(const float s) {
+    r /= s; g /= s; b /= s; a /= s;
+    return *this;
+  }
 
-	float operator *(const RGBAVector &p) const {
-		return r * p.r + g * p.g + b * p.b + a * p.a;
-	}
+  float operator *(const RGBAVector &p) const {
+    return r * p.r + g * p.g + b * p.b + a * p.a;
+  }
 
-	float Length() const {
-		return sqrt((*this) * (*this));
-	}
+  float Length() const {
+    return sqrt((*this) * (*this));
+  }
 
-	RGBAVector &operator *=(const RGBAVector &v) {
-		r *= v.r; g *= v.g; b *= v.b; a *= v.a;
-		return *this;
-	}
+  RGBAVector &operator *=(const RGBAVector &v) {
+    r *= v.r; g *= v.g; b *= v.b; a *= v.a;
+    return *this;
+  }
 
-	RGBAVector operator *(const float s) const {
-		return RGBAVector(r * s, g * s, b * s, a * s);
-	}
+  RGBAVector operator *(const float s) const {
+    return RGBAVector(r * s, g * s, b * s, a * s);
+  }
 
-	friend RGBAVector operator *(const float s, const RGBAVector &p) {
-		return RGBAVector(p.r * s, p.g * s, p.b * s, p.a * s);
-	}
+  friend RGBAVector operator *(const float s, const RGBAVector &p) {
+    return RGBAVector(p.r * s, p.g * s, p.b * s, p.a * s);
+  }
 
-	RGBAVector &operator *=(const float s) {
-		r *= s; g *= s; b *= s; a *= s;
-		return *this;
-	}
+  RGBAVector &operator *=(const float s) {
+    r *= s; g *= s; b *= s; a *= s;
+    return *this;
+  }
 
-	float &operator [](const int i) {
-		return c[i];
-	}
+  float &operator [](const int i) {
+    return c[i];
+  }
 
-	friend bool operator ==(const RGBAVector &rhs, const RGBAVector &lhs) {
-		const RGBAVector d = rhs - lhs;
-		return fabs(d.r) < 1e-7 && fabs(d.g) < 1e-7 && fabs(d.b) < 1e-7 && fabs(d.a) < 1e-7;
-	}
+  friend bool operator ==(const RGBAVector &rhs, const RGBAVector &lhs) {
+    const RGBAVector d = rhs - lhs;
+    return fabs(d.r) < 1e-7 && fabs(d.g) < 1e-7 && fabs(d.b) < 1e-7 && fabs(d.a) < 1e-7;
+  }
 
-	friend bool operator !=(const RGBAVector &rhs, const RGBAVector &lhs) {
-		return !(rhs == lhs);
-	}
+  friend bool operator !=(const RGBAVector &rhs, const RGBAVector &lhs) {
+    return !(rhs == lhs);
+  }
 
-	operator float *() {
-		return c;
-	}
+  operator float *() {
+    return c;
+  }
 
-	RGBAVector Cross(const RGBAVector &rhs) {
-		return RGBAVector(
-			rhs.y * z - y * rhs.z,
-			rhs.z * x - z * rhs.x,
-			rhs.x * y - x * rhs.y,
-			1.0f
-		);
-	}
+  RGBAVector Cross(const RGBAVector &rhs) {
+    return RGBAVector(
+      rhs.y * z - y * rhs.z,
+      rhs.z * x - z * rhs.x,
+      rhs.x * y - x * rhs.y,
+      1.0f
+    );
+  }
 
-	// Quantize this point.
-	uint32 ToPixel(const uint32 channelMask = 0xFFFFFFFF, const int pBit = -1) const;
+  // Quantize this point.
+  uint32 ToPixel(const uint32 channelMask = 0xFFFFFFFF, const int pBit = -1) const;
 
 private:
-	uint32 idx;
+  uint32 idx;
 };
 
 class RGBAMatrix {
 private:
-	union {
-		float m[kNumColorChannels*kNumColorChannels];
-		struct {
-			float m1, m2, m3, m4;
-			float m5, m6, m7, m8;
-			float m9, m10, m11, m12;
-			float m13, m14, m15, m16;
-		};
-	};
+  union {
+    float m[kNumColorChannels*kNumColorChannels];
+    struct {
+      float m1, m2, m3, m4;
+      float m5, m6, m7, m8;
+      float m9, m10, m11, m12;
+      float m13, m14, m15, m16;
+    };
+  };
 
-	RGBAMatrix(const float *arr) {
-		memcpy(m, arr, sizeof(m));
-	}
+  RGBAMatrix(const float *arr) {
+    memcpy(m, arr, sizeof(m));
+  }
 
 public:
 
-	RGBAMatrix(
-	  float _m1, float _m2, float _m3, float _m4,
-	  float _m5, float _m6, float _m7, float _m8,
-	  float _m9, float _m10, float _m11, float _m12,
-	  float _m13, float _m14, float _m15, float _m16
-        ) :
-	  m1(_m1), m2(_m2), m3(_m3), m4(_m4),
-	  m5(_m5), m6(_m6), m7(_m7), m8(_m8),
-	  m9(_m9), m10(_m10), m11(_m11), m12(_m12),
-	  m13(_m13), m14(_m14), m15(_m15), m16(_m16)
-	{ }
-	
-	RGBAMatrix() : 
-		m1(1.0f), m2(0.0f), m3(0.0f), m4(0.0f),
-		m5(0.0f), m6(1.0f), m7(0.0f), m8(0.0f),
-		m9(0.0f), m10(0.0f), m11(1.0f), m12(0.0f),
-		m13(0.0f), m14(0.0f), m15(0.0f), m16(1.0f)
-	{ }
+  RGBAMatrix(
+    float _m1, float _m2, float _m3, float _m4,
+    float _m5, float _m6, float _m7, float _m8,
+    float _m9, float _m10, float _m11, float _m12,
+    float _m13, float _m14, float _m15, float _m16
+  ) :
+    m1(_m1), m2(_m2), m3(_m3), m4(_m4),
+    m5(_m5), m6(_m6), m7(_m7), m8(_m8),
+    m9(_m9), m10(_m10), m11(_m11), m12(_m12),
+    m13(_m13), m14(_m14), m15(_m15), m16(_m16)
+  { }
+  
+  RGBAMatrix() : 
+    m1(1.0f), m2(0.0f), m3(0.0f), m4(0.0f),
+    m5(0.0f), m6(1.0f), m7(0.0f), m8(0.0f),
+    m9(0.0f), m10(0.0f), m11(1.0f), m12(0.0f),
+    m13(0.0f), m14(0.0f), m15(0.0f), m16(1.0f)
+  { }
 
-	RGBAMatrix &operator =(const RGBAMatrix &other) {
-		memcpy(m, other.m, sizeof(m));
-		return (*this);
-	}
+  RGBAMatrix &operator =(const RGBAMatrix &other) {
+    memcpy(m, other.m, sizeof(m));
+    return (*this);
+  }
 
-	RGBAMatrix operator +(const RGBAMatrix &p) const {
-		float newm[kNumColorChannels*kNumColorChannels];
-		for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] + p.m[i];
-		return RGBAMatrix(newm);
-	}
+  RGBAMatrix operator +(const RGBAMatrix &p) const {
+    float newm[kNumColorChannels*kNumColorChannels];
+    for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] + p.m[i];
+    return RGBAMatrix(newm);
+  }
 
-	RGBAMatrix &operator +=(const RGBAMatrix &p) {
-		for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] += p.m[i];
-		return *this;
-	}
+  RGBAMatrix &operator +=(const RGBAMatrix &p) {
+    for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] += p.m[i];
+    return *this;
+  }
 
-	RGBAMatrix operator -(const RGBAMatrix &p) const {
-		float newm[kNumColorChannels*kNumColorChannels];
-		for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] - p.m[i];
-		return RGBAMatrix(newm);
-	}
+  RGBAMatrix operator -(const RGBAMatrix &p) const {
+    float newm[kNumColorChannels*kNumColorChannels];
+    for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] - p.m[i];
+    return RGBAMatrix(newm);
+  }
 
-	RGBAMatrix &operator -=(const RGBAMatrix &p) {
-		for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] -= p.m[i];
-		return *this;
-	}
+  RGBAMatrix &operator -=(const RGBAMatrix &p) {
+    for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] -= p.m[i];
+    return *this;
+  }
 
-	RGBAMatrix operator /(const float s) const {
-		float newm[kNumColorChannels*kNumColorChannels];
-		for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] / s;
-		return RGBAMatrix(newm);
-	}
+  RGBAMatrix operator /(const float s) const {
+    float newm[kNumColorChannels*kNumColorChannels];
+    for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] / s;
+    return RGBAMatrix(newm);
+  }
 
-	RGBAMatrix &operator /=(const float s) {
-		for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] /= s;
-		return *this;
-	}
+  RGBAMatrix &operator /=(const float s) {
+    for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] /= s;
+    return *this;
+  }
 
-	RGBAMatrix operator *(const float s) const {
-		float newm[kNumColorChannels*kNumColorChannels];
-		for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] * s;
-		return RGBAMatrix(newm);
-	}
+  RGBAMatrix operator *(const float s) const {
+    float newm[kNumColorChannels*kNumColorChannels];
+    for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] * s;
+    return RGBAMatrix(newm);
+  }
 
-	RGBAMatrix operator *(const double s) const {
-		float newm[kNumColorChannels*kNumColorChannels];
-		for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = float(double(m[i]) * s);
-		return RGBAMatrix(newm);
-	}
+  RGBAMatrix operator *(const double s) const {
+    float newm[kNumColorChannels*kNumColorChannels];
+    for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = float(double(m[i]) * s);
+    return RGBAMatrix(newm);
+  }
 
-	friend RGBAMatrix operator *(const float s, const RGBAMatrix &p) {
-		float newm[kNumColorChannels*kNumColorChannels];
-		for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = p.m[i] * s;
-		return RGBAMatrix(newm);	
-	}
+  friend RGBAMatrix operator *(const float s, const RGBAMatrix &p) {
+    float newm[kNumColorChannels*kNumColorChannels];
+    for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = p.m[i] * s;
+    return RGBAMatrix(newm);  
+  }
 
-	friend RGBAMatrix operator *(const double s, const RGBAMatrix &p) {
-		float newm[kNumColorChannels*kNumColorChannels];
-		for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = float(double(p.m[i]) * s);
-		return RGBAMatrix(newm);	
-	}
+  friend RGBAMatrix operator *(const double s, const RGBAMatrix &p) {
+    float newm[kNumColorChannels*kNumColorChannels];
+    for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = float(double(p.m[i]) * s);
+    return RGBAMatrix(newm);  
+  }
 
-	RGBAMatrix &operator *=(const float s) {
-		for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] *= s;
-		return *this;
-	}
+  RGBAMatrix &operator *=(const float s) {
+    for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] *= s;
+    return *this;
+  }
 
-	float &operator ()(const int i, const int j) {
-		return (*this)[i*4 + j];
-	}
+  float &operator ()(const int i, const int j) {
+    return (*this)[i*4 + j];
+  }
 
-	float &operator [](const int i) {
-		return m[i];
-	}
+  float &operator [](const int i) {
+    return m[i];
+  }
 
-	friend bool operator ==(const RGBAMatrix &rhs, const RGBAMatrix &lhs) {
-		const RGBAMatrix d = rhs - lhs;
-		for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++)
-			if(d.m[i] > 1e-10)
-				return false;
-		return true;
-	}
+  friend bool operator ==(const RGBAMatrix &rhs, const RGBAMatrix &lhs) {
+    const RGBAMatrix d = rhs - lhs;
+    for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++)
+      if(d.m[i] > 1e-10)
+        return false;
+    return true;
+  }
 
-	operator float *() {
-		return m;
-	}
+  operator float *() {
+    return m;
+  }
 
-	RGBAVector operator *(const RGBAVector &p) const;
-	RGBAMatrix operator *(const RGBAMatrix &mat) const;
-	RGBAMatrix &operator *=(const RGBAMatrix &mat);
-	static RGBAMatrix RotateX(float rad);
-	static RGBAMatrix RotateY(float rad);
-	static RGBAMatrix RotateZ(float rad);
-	static RGBAMatrix Translate(const RGBAVector &t);
-	bool Identity();
+  RGBAVector operator *(const RGBAVector &p) const;
+  RGBAMatrix operator *(const RGBAMatrix &mat) const;
+  RGBAMatrix &operator *=(const RGBAMatrix &mat);
+  static RGBAMatrix RotateX(float rad);
+  static RGBAMatrix RotateY(float rad);
+  static RGBAMatrix RotateZ(float rad);
+  static RGBAMatrix Translate(const RGBAVector &t);
+  bool Identity();
 };
 
 class RGBADir : public RGBAVector {
 public:
-	RGBADir() : RGBAVector() { }
-	RGBADir(const RGBAVector &p) : RGBAVector(p) {
-		*this /= Length();
-	}
+  RGBADir() : RGBAVector() { }
+  RGBADir(const RGBAVector &p) : RGBAVector(p) {
+    *this /= Length();
+  }
 };
 
 // Makes sure that the values of the endpoints lie between 0 and 1.
@@ -340,83 +340,83 @@ extern void ClampEndpoints(RGBAVector &p1, RGBAVector &p2);
 class RGBACluster {
 public:
 
-	RGBACluster() : 
-	  m_NumPoints(0), m_Total(0), 
-	  m_PointBitString(0),
-	  m_Min(FLT_MAX),
-	  m_Max(-FLT_MAX),
-	  m_PrincipalAxisCached(false)
-	{ } 
+  RGBACluster() : 
+    m_NumPoints(0), m_Total(0), 
+    m_PointBitString(0),
+    m_Min(FLT_MAX),
+    m_Max(-FLT_MAX),
+    m_PrincipalAxisCached(false)
+  { } 
 
-	RGBACluster(const RGBACluster &c) : 
-		m_NumPoints(c.m_NumPoints),
-		m_Total(c.m_Total),
-		m_PointBitString(c.m_PointBitString), 
-		m_Min(c.m_Min),
-		m_Max(c.m_Max),
-		m_PrincipalAxisCached(c.m_PrincipalAxisCached),
-		m_PrincipalEigenvalue(c.m_PrincipalEigenvalue),
-		m_SecondEigenvalue(c.m_SecondEigenvalue),
-		m_PowerMethodIterations(c.m_PowerMethodIterations),
-		m_PrincipalAxis(c.m_PrincipalAxis)
-	{ 
-		memcpy(this->m_DataPoints, c.m_DataPoints, m_NumPoints * sizeof(RGBAVector));
-	}
+  RGBACluster(const RGBACluster &c) : 
+    m_NumPoints(c.m_NumPoints),
+    m_Total(c.m_Total),
+    m_PointBitString(c.m_PointBitString), 
+    m_Min(c.m_Min),
+    m_Max(c.m_Max),
+    m_PrincipalAxisCached(c.m_PrincipalAxisCached),
+    m_PrincipalEigenvalue(c.m_PrincipalEigenvalue),
+    m_SecondEigenvalue(c.m_SecondEigenvalue),
+    m_PowerMethodIterations(c.m_PowerMethodIterations),
+    m_PrincipalAxis(c.m_PrincipalAxis)
+  { 
+    memcpy(this->m_DataPoints, c.m_DataPoints, m_NumPoints * sizeof(RGBAVector));
+  }
 
-	RGBACluster(const RGBACluster &left, const RGBACluster &right);
-	RGBACluster(const RGBAVector &p) : 
-		m_NumPoints(1),
-		m_Total(p),
-		m_PointBitString(0),
-		m_Min(p), m_Max(p),
-		m_PrincipalAxisCached(false)
-	{ 
-		m_DataPoints[0] = p;
-		m_PointBitString |= (1 << p.GetIdx());
-	}
-			
-	RGBAVector GetTotal() const { return m_Total; }
-	const RGBAVector &GetPoint(int idx) const { return m_DataPoints[idx]; }
-	uint32 GetNumPoints() const { return m_NumPoints; }
-	RGBAVector GetAvg() const { return m_Total / float(m_NumPoints); }
-	const RGBAVector *GetPoints() const { return m_DataPoints; }
+  RGBACluster(const RGBACluster &left, const RGBACluster &right);
+  RGBACluster(const RGBAVector &p) : 
+    m_NumPoints(1),
+    m_Total(p),
+    m_PointBitString(0),
+    m_Min(p), m_Max(p),
+    m_PrincipalAxisCached(false)
+  { 
+    m_DataPoints[0] = p;
+    m_PointBitString |= (1 << p.GetIdx());
+  }
+      
+  RGBAVector GetTotal() const { return m_Total; }
+  const RGBAVector &GetPoint(int idx) const { return m_DataPoints[idx]; }
+  uint32 GetNumPoints() const { return m_NumPoints; }
+  RGBAVector GetAvg() const { return m_Total / float(m_NumPoints); }
+  const RGBAVector *GetPoints() const { return m_DataPoints; }
 
-	void AddPoint(const RGBAVector &p);
+  void AddPoint(const RGBAVector &p);
 
-	void GetBoundingBox(RGBAVector &Min, RGBAVector &Max) const {
-		Min = m_Min, Max = m_Max;
-	}
+  void GetBoundingBox(RGBAVector &Min, RGBAVector &Max) const {
+    Min = m_Min, Max = m_Max;
+  }
 
-	// Returns the error if we were to quantize the colors right now with the given number of buckets and bit mask.
-	double QuantizedError(const RGBAVector &p1, const RGBAVector &p2, uint8 nBuckets, uint32 bitMask, const RGBAVector &errorMetricVec, const int pbits[2] = NULL, int *indices = NULL) const;
+  // Returns the error if we were to quantize the colors right now with the given number of buckets and bit mask.
+  double QuantizedError(const RGBAVector &p1, const RGBAVector &p2, uint8 nBuckets, uint32 bitMask, const RGBAVector &errorMetricVec, const int pbits[2] = NULL, int *indices = NULL) const;
 
-	// Returns the principal axis for this point cluster.
-	double GetPrincipalEigenvalue();
-	double GetSecondEigenvalue();
-	uint32 GetPowerMethodIterations();
-	void GetPrincipalAxis(RGBADir &axis);
+  // Returns the principal axis for this point cluster.
+  double GetPrincipalEigenvalue();
+  double GetSecondEigenvalue();
+  uint32 GetPowerMethodIterations();
+  void GetPrincipalAxis(RGBADir &axis);
 
-	bool AllSamePoint() const { return m_Max == m_Min; }
-	int GetPointBitString() const { return m_PointBitString; }
+  bool AllSamePoint() const { return m_Max == m_Min; }
+  int GetPointBitString() const { return m_PointBitString; }
 
 private:
 
-	// The number of points in the cluster.
-	uint32 m_NumPoints;
+  // The number of points in the cluster.
+  uint32 m_NumPoints;
 
-	RGBAVector m_Total;
+  RGBAVector m_Total;
 
-	// The points in the cluster.
-	RGBAVector m_DataPoints[kMaxNumDataPoints];
+  // The points in the cluster.
+  RGBAVector m_DataPoints[kMaxNumDataPoints];
 
-	int m_PointBitString;
-	RGBAVector m_Min, m_Max;
+  int m_PointBitString;
+  RGBAVector m_Min, m_Max;
 
-	bool m_PrincipalAxisCached;
-	double m_PrincipalEigenvalue;
-	double m_SecondEigenvalue;
-	uint32 m_PowerMethodIterations;
-	RGBADir m_PrincipalAxis;
+  bool m_PrincipalAxisCached;
+  double m_PrincipalEigenvalue;
+  double m_SecondEigenvalue;
+  uint32 m_PowerMethodIterations;
+  RGBADir m_PrincipalAxis;
 };
 
 extern uint8 QuantizeChannel(const uint8 val, const uint8 mask, const int pBit = -1);
diff --git a/BPTCEncoder/src/RGBAEndpointsSIMD.cpp b/BPTCEncoder/src/RGBAEndpointsSIMD.cpp
index c187326..bf71188 100755
--- a/BPTCEncoder/src/RGBAEndpointsSIMD.cpp
+++ b/BPTCEncoder/src/RGBAEndpointsSIMD.cpp
@@ -92,37 +92,37 @@ static inline uint32 popcnt32(uint32 x) {
 
 /* Original scalar implementation:
 
-	// If the mask is all the bits, then we can just return the value.
-	if(mask == 0xFF) {
-		return val;
-	}
+  // If the mask is all the bits, then we can just return the value.
+  if(mask == 0xFF) {
+    return val;
+  }
 
-	uint32 prec = CountBitsInMask(mask);
-	const uint32 step = 1 << (8 - prec);
+  uint32 prec = CountBitsInMask(mask);
+  const uint32 step = 1 << (8 - prec);
 
-	assert(step-1 == uint8(~mask));
+  assert(step-1 == uint8(~mask));
 
-	uint32 lval = val & mask;
-	uint32 hval = lval + step;
+  uint32 lval = val & mask;
+  uint32 hval = lval + step;
 
-	if(pBit >= 0) {
-		prec++;
-		lval |= !!(pBit) << (8 - prec);
-		hval |= !!(pBit) << (8 - prec);
-	}
+  if(pBit >= 0) {
+    prec++;
+    lval |= !!(pBit) << (8 - prec);
+    hval |= !!(pBit) << (8 - prec);
+  }
 
-	if(lval > val) {
-		lval -= step;
-		hval -= step;
-	}
+  if(lval > val) {
+    lval -= step;
+    hval -= step;
+  }
 
-	lval |= lval >> prec;
-	hval |= hval >> prec;
+  lval |= lval >> prec;
+  hval |= hval >> prec;
 
-	if(sad(val, lval) < sad(val, hval))
-		return lval;
-	else
-		return hval;
+  if(sad(val, lval) < sad(val, hval))
+    return lval;
+  else
+    return hval;
 */
 
 // !TODO! AVX2 supports an instruction known as vsllv, which shifts a vector
@@ -158,114 +158,114 @@ static const ALIGN_SSE uint32 kThirtyTwoVector[4] = { 32, 32, 32, 32 };
 static const __m128i kByteValMask = _mm_set_epi32(0xFF, 0xFF, 0xFF, 0xFF);
 
 static inline __m128i sad(const __m128i &a, const __m128i &b) {
-	const __m128i maxab = _mm_max_epu8(a, b);
-	const __m128i minab = _mm_min_epu8(a, b);
-	return _mm_and_si128( kByteValMask, _mm_subs_epu8( maxab, minab ) );
+  const __m128i maxab = _mm_max_epu8(a, b);
+  const __m128i minab = _mm_min_epu8(a, b);
+  return _mm_and_si128( kByteValMask, _mm_subs_epu8( maxab, minab ) );
 }
 
 __m128i RGBAVectorSIMD::ToPixel(const __m128i &qmask) const {
 
-	// !SPEED! We should figure out a way to get rid of these scalar operations.
+  // !SPEED! We should figure out a way to get rid of these scalar operations.
 #ifdef HAS_SSE_POPCNT
-	const uint32 prec = _mm_popcnt_u32(((uint32 *)(&qmask))[0]);
+  const uint32 prec = _mm_popcnt_u32(((uint32 *)(&qmask))[0]);
 #else
-	const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]);
+  const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]);
 #endif
-	
-	assert(r >= 0.0f && r <= 255.0f);
-	assert(g >= 0.0f && g <= 255.0f);
-	assert(b >= 0.0f && b <= 255.0f);
-	assert(a >= 0.0f && a <= 255.0f);
-	assert(((uint32 *)(&qmask))[3] == 0xFF || ((uint32 *)(&qmask))[3] == ((uint32 *)(&qmask))[0]);
-	assert(((uint32 *)(&qmask))[2] == ((uint32 *)(&qmask))[1] && ((uint32 *)(&qmask))[0] == ((uint32 *)(&qmask))[1]);
+  
+  assert(r >= 0.0f && r <= 255.0f);
+  assert(g >= 0.0f && g <= 255.0f);
+  assert(b >= 0.0f && b <= 255.0f);
+  assert(a >= 0.0f && a <= 255.0f);
+  assert(((uint32 *)(&qmask))[3] == 0xFF || ((uint32 *)(&qmask))[3] == ((uint32 *)(&qmask))[0]);
+  assert(((uint32 *)(&qmask))[2] == ((uint32 *)(&qmask))[1] && ((uint32 *)(&qmask))[0] == ((uint32 *)(&qmask))[1]);
 
-	const __m128i val = _mm_cvtps_epi32( _mm_add_ps(kHalfVector, vec) );
+  const __m128i val = _mm_cvtps_epi32( _mm_add_ps(kHalfVector, vec) );
 
-	const __m128i step = _mm_slli_epi32( kOneVector, 8 - prec );
-	const __m128i &mask = qmask;
+  const __m128i step = _mm_slli_epi32( kOneVector, 8 - prec );
+  const __m128i &mask = qmask;
 
-	__m128i lval = _mm_and_si128(val, mask);
-	__m128i hval = _mm_add_epi32(lval, step);
+  __m128i lval = _mm_and_si128(val, mask);
+  __m128i hval = _mm_add_epi32(lval, step);
 
-	const __m128i lvalShift = _mm_srli_epi32(lval, prec);
-	const __m128i hvalShift = _mm_srli_epi32(hval, prec);
+  const __m128i lvalShift = _mm_srli_epi32(lval, prec);
+  const __m128i hvalShift = _mm_srli_epi32(hval, prec);
 
-	lval = _mm_or_si128(lval, lvalShift);
-	hval = _mm_or_si128(hval, hvalShift);
+  lval = _mm_or_si128(lval, lvalShift);
+  hval = _mm_or_si128(hval, hvalShift);
 
-	const __m128i lvald = _mm_sub_epi32( val, lval );
-	const __m128i hvald = _mm_sub_epi32( hval, val );
+  const __m128i lvald = _mm_sub_epi32( val, lval );
+  const __m128i hvald = _mm_sub_epi32( hval, val );
 
-	const __m128i vd = _mm_cmplt_epi32(lvald, hvald);
-	__m128i ans = _mm_blendv_epi8(hval, lval, vd);
+  const __m128i vd = _mm_cmplt_epi32(lvald, hvald);
+  __m128i ans = _mm_blendv_epi8(hval, lval, vd);
 
-	const __m128i chanExact = _mm_cmpeq_epi32(mask, kByteValMask);
-	ans = _mm_blendv_epi8( ans, val, chanExact );
-	return ans;
+  const __m128i chanExact = _mm_cmpeq_epi32(mask, kByteValMask);
+  ans = _mm_blendv_epi8( ans, val, chanExact );
+  return ans;
 }
 
 __m128i RGBAVectorSIMD::ToPixel(const __m128i &qmask, const int pBit) const {
-	
-	// !SPEED! We should figure out a way to get rid of these scalar operations.
+  
+  // !SPEED! We should figure out a way to get rid of these scalar operations.
 #ifdef HAS_SSE_POPCNT
-	const uint32 prec = _mm_popcnt_u32(((uint32 *)(&qmask))[0]);
+  const uint32 prec = _mm_popcnt_u32(((uint32 *)(&qmask))[0]);
 #else
-	const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]);
+  const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]);
 #endif
-	
-	assert(r >= 0.0f && r <= 255.0f);
-	assert(g >= 0.0f && g <= 255.0f);
-	assert(b >= 0.0f && b <= 255.0f);
-	assert(a >= 0.0f && a <= 255.0f);
-	assert(((uint32 *)(&qmask))[3] == 0xFF || ((uint32 *)(&qmask))[3] == ((uint32 *)(&qmask))[0]);
-	assert(((uint32 *)(&qmask))[2] == ((uint32 *)(&qmask))[1] && ((uint32 *)(&qmask))[0] == ((uint32 *)(&qmask))[1]);
+  
+  assert(r >= 0.0f && r <= 255.0f);
+  assert(g >= 0.0f && g <= 255.0f);
+  assert(b >= 0.0f && b <= 255.0f);
+  assert(a >= 0.0f && a <= 255.0f);
+  assert(((uint32 *)(&qmask))[3] == 0xFF || ((uint32 *)(&qmask))[3] == ((uint32 *)(&qmask))[0]);
+  assert(((uint32 *)(&qmask))[2] == ((uint32 *)(&qmask))[1] && ((uint32 *)(&qmask))[0] == ((uint32 *)(&qmask))[1]);
 
-	const __m128i val = _mm_cvtps_epi32( _mm_add_ps(kHalfVector, vec) );
-	const __m128i pbit = _mm_set1_epi32(!!pBit);
+  const __m128i val = _mm_cvtps_epi32( _mm_add_ps(kHalfVector, vec) );
+  const __m128i pbit = _mm_set1_epi32(!!pBit);
 
-	const __m128i &mask = qmask; // _mm_set_epi32(alphaMask, channelMask, channelMask, channelMask);
-	const __m128i step = _mm_slli_epi32( kOneVector, 8 - prec );
+  const __m128i &mask = qmask; // _mm_set_epi32(alphaMask, channelMask, channelMask, channelMask);
+  const __m128i step = _mm_slli_epi32( kOneVector, 8 - prec );
 
-	__m128i lval = _mm_and_si128( val, mask );
-	__m128i hval = _mm_add_epi32( lval, step );
+  __m128i lval = _mm_and_si128( val, mask );
+  __m128i hval = _mm_add_epi32( lval, step );
 
-	const __m128i pBitShifted = _mm_slli_epi32(pbit, 7 - prec);
-	lval = _mm_or_si128(lval, pBitShifted );
-	hval = _mm_or_si128(hval, pBitShifted);
+  const __m128i pBitShifted = _mm_slli_epi32(pbit, 7 - prec);
+  lval = _mm_or_si128(lval, pBitShifted );
+  hval = _mm_or_si128(hval, pBitShifted);
 
-	// These next three lines we make sure that after adding the pbit that val is
-	// still in between lval and hval. If it isn't, then we subtract a
-	// step from both. Now, val should be larger than lval and less than
-	// hval, but certain situations make this not always the case (e.g. val
-	// is 0, precision is 4 bits, and pbit is 1). Hence, we add back the
-	// step if it goes below zero, making it equivalent to hval and so it
-	// doesn't matter which we choose.
-	{
-		__m128i cmp = _mm_cmpgt_epi32(lval, val);
-		cmp = _mm_mullo_epi32(cmp, step);
-		lval = _mm_add_epi32(lval, cmp);
-		hval = _mm_add_epi32(hval, cmp);
+  // These next three lines we make sure that after adding the pbit that val is
+  // still in between lval and hval. If it isn't, then we subtract a
+  // step from both. Now, val should be larger than lval and less than
+  // hval, but certain situations make this not always the case (e.g. val
+  // is 0, precision is 4 bits, and pbit is 1). Hence, we add back the
+  // step if it goes below zero, making it equivalent to hval and so it
+  // doesn't matter which we choose.
+  {
+    __m128i cmp = _mm_cmpgt_epi32(lval, val);
+    cmp = _mm_mullo_epi32(cmp, step);
+    lval = _mm_add_epi32(lval, cmp);
+    hval = _mm_add_epi32(hval, cmp);
 
-		cmp = _mm_cmplt_epi32(lval, kZeroVector);
-		cmp = _mm_mullo_epi32(cmp, step);
-		lval = _mm_sub_epi32(lval, cmp);
-	}
+    cmp = _mm_cmplt_epi32(lval, kZeroVector);
+    cmp = _mm_mullo_epi32(cmp, step);
+    lval = _mm_sub_epi32(lval, cmp);
+  }
 
-	const __m128i lvalShift = _mm_srli_epi32(lval, prec + 1);
-	const __m128i hvalShift = _mm_srli_epi32(hval, prec + 1);
+  const __m128i lvalShift = _mm_srli_epi32(lval, prec + 1);
+  const __m128i hvalShift = _mm_srli_epi32(hval, prec + 1);
 
-	lval = _mm_or_si128(lval, lvalShift);
-	hval = _mm_or_si128(hval, hvalShift);
+  lval = _mm_or_si128(lval, lvalShift);
+  hval = _mm_or_si128(hval, hvalShift);
 
-	const __m128i lvald = _mm_sub_epi32( val, lval );
-	const __m128i hvald = _mm_sub_epi32( hval, val );
+  const __m128i lvald = _mm_sub_epi32( val, lval );
+  const __m128i hvald = _mm_sub_epi32( hval, val );
 
-	const __m128i vd = _mm_cmplt_epi32(lvald, hvald);
-	__m128i ans = _mm_blendv_epi8(hval, lval, vd);
+  const __m128i vd = _mm_cmplt_epi32(lvald, hvald);
+  __m128i ans = _mm_blendv_epi8(hval, lval, vd);
 
-	const __m128i chanExact = _mm_cmpeq_epi32(mask, kByteValMask);
-	ans = _mm_blendv_epi8( ans, val, chanExact );
-	return ans;
+  const __m128i chanExact = _mm_cmpeq_epi32(mask, kByteValMask);
+  ans = _mm_blendv_epi8( ans, val, chanExact );
+  return ans;
 }
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -275,18 +275,18 @@ __m128i RGBAVectorSIMD::ToPixel(const __m128i &qmask, const int pBit) const {
 ///////////////////////////////////////////////////////////////////////////////
 
 RGBAVectorSIMD RGBAMatrixSIMD::operator *(const RGBAVectorSIMD &p) const {
-	
-	__m128 xVec = _mm_set1_ps( p.x );
-	__m128 yVec = _mm_set1_ps( p.y );
-	__m128 zVec = _mm_set1_ps( p.z );
-	__m128 wVec = _mm_set1_ps( p.w );
+  
+  __m128 xVec = _mm_set1_ps( p.x );
+  __m128 yVec = _mm_set1_ps( p.y );
+  __m128 zVec = _mm_set1_ps( p.z );
+  __m128 wVec = _mm_set1_ps( p.w );
 
-	__m128 vec1 = _mm_mul_ps( xVec, col[0] );
-	__m128 vec2 = _mm_mul_ps( yVec, col[1] );
-	__m128 vec3 = _mm_mul_ps( zVec, col[2] );
-	__m128 vec4 = _mm_mul_ps( wVec, col[3] );
+  __m128 vec1 = _mm_mul_ps( xVec, col[0] );
+  __m128 vec2 = _mm_mul_ps( yVec, col[1] );
+  __m128 vec3 = _mm_mul_ps( zVec, col[2] );
+  __m128 vec4 = _mm_mul_ps( wVec, col[3] );
 
-	return RGBAVectorSIMD( _mm_add_ps( _mm_add_ps( vec1, vec2 ), _mm_add_ps( vec3, vec4 ) ) );
+  return RGBAVectorSIMD( _mm_add_ps( _mm_add_ps( vec1, vec2 ), _mm_add_ps( vec3, vec4 ) ) );
 }
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -297,104 +297,104 @@ RGBAVectorSIMD RGBAMatrixSIMD::operator *(const RGBAVectorSIMD &p) const {
 
 RGBAClusterSIMD::RGBAClusterSIMD(const RGBAClusterSIMD &left, const RGBAClusterSIMD &right) {
 
-	assert(!(left.m_PointBitString & right.m_PointBitString));
+  assert(!(left.m_PointBitString & right.m_PointBitString));
 
-	*this = left;
-	for(int i = 0; i < right.m_NumPoints; i++) {
+  *this = left;
+  for(int i = 0; i < right.m_NumPoints; i++) {
 
-		const RGBAVectorSIMD &p = right.m_DataPoints[i];
+    const RGBAVectorSIMD &p = right.m_DataPoints[i];
 
-		assert(m_NumPoints < kMaxNumDataPoints);
-		m_Total += p;
-		m_DataPoints[m_NumPoints++] = p;
+    assert(m_NumPoints < kMaxNumDataPoints);
+    m_Total += p;
+    m_DataPoints[m_NumPoints++] = p;
 
-		m_Min.vec = _mm_min_ps(m_Min.vec, p.vec);
-		m_Max.vec = _mm_max_ps(m_Max.vec, p.vec);
-	}
+    m_Min.vec = _mm_min_ps(m_Min.vec, p.vec);
+    m_Max.vec = _mm_max_ps(m_Max.vec, p.vec);
+  }
 
-	m_PointBitString = left.m_PointBitString | right.m_PointBitString;
-	m_PrincipalAxisCached = false;
-}	
+  m_PointBitString = left.m_PointBitString | right.m_PointBitString;
+  m_PrincipalAxisCached = false;
+}  
 
 void RGBAClusterSIMD::AddPoint(const RGBAVectorSIMD &p, int idx) {
-	assert(m_NumPoints < kMaxNumDataPoints);
-	m_Total += p;
-	m_DataPoints[m_NumPoints++] = p;
-	m_PointBitString |= 1 << idx;
+  assert(m_NumPoints < kMaxNumDataPoints);
+  m_Total += p;
+  m_DataPoints[m_NumPoints++] = p;
+  m_PointBitString |= 1 << idx;
 
-	m_Min.vec = _mm_min_ps(m_Min.vec, p.vec);
-	m_Max.vec = _mm_max_ps(m_Max.vec, p.vec);
+  m_Min.vec = _mm_min_ps(m_Min.vec, p.vec);
+  m_Max.vec = _mm_max_ps(m_Max.vec, p.vec);
 }
 
 float RGBAClusterSIMD::QuantizedError(const RGBAVectorSIMD &p1, const RGBAVectorSIMD &p2, const uint8 nBuckets, const __m128i &bitMask, const int pbits[2], __m128i *indices) const {
 
-	// nBuckets should be a power of two.
-	assert(!(nBuckets & (nBuckets - 1)));
+  // nBuckets should be a power of two.
+  assert(!(nBuckets & (nBuckets - 1)));
 
 #ifdef HAS_SSE_POPCNT
-	const uint8 indexPrec = 8-_mm_popcnt_u32(~(nBuckets - 1) & 0xFF);
+  const uint8 indexPrec = 8-_mm_popcnt_u32(~(nBuckets - 1) & 0xFF);
 #else
-	const uint8 indexPrec = 8-popcnt32(~(nBuckets - 1) & 0xFF);
+  const uint8 indexPrec = 8-popcnt32(~(nBuckets - 1) & 0xFF);
 #endif
-	assert(indexPrec >= 2 && indexPrec <= 4);
+  assert(indexPrec >= 2 && indexPrec <= 4);
 
-	typedef __m128i tInterpPair[2];
-	typedef tInterpPair tInterpLevel[16];
-	const tInterpLevel *interpVals = kBC7InterpolationValuesSIMD + (indexPrec - 1);
+  typedef __m128i tInterpPair[2];
+  typedef tInterpPair tInterpLevel[16];
+  const tInterpLevel *interpVals = kBC7InterpolationValuesSIMD + (indexPrec - 1);
 
-	__m128i qp1, qp2;
-	if(pbits) {
-		qp1 = p1.ToPixel(bitMask, pbits[0]);
-		qp2 = p2.ToPixel(bitMask, pbits[1]);
-	}
-	else {
-		qp1 = p1.ToPixel(bitMask);
-		qp2 = p2.ToPixel(bitMask);
-	}
+  __m128i qp1, qp2;
+  if(pbits) {
+    qp1 = p1.ToPixel(bitMask, pbits[0]);
+    qp2 = p2.ToPixel(bitMask, pbits[1]);
+  }
+  else {
+    qp1 = p1.ToPixel(bitMask);
+    qp2 = p2.ToPixel(bitMask);
+  }
 
-	__m128 errorMetricVec = _mm_load_ps( BC7C::GetErrorMetric() );
+  __m128 errorMetricVec = _mm_load_ps( BC7C::GetErrorMetric() );
 
-	__m128 totalError = kZero;
-	for(int i = 0; i < m_NumPoints; i++) {
+  __m128 totalError = kZero;
+  for(int i = 0; i < m_NumPoints; i++) {
 
-		const __m128i pixel = m_DataPoints[i].ToPixel( kByteValMask );
+    const __m128i pixel = m_DataPoints[i].ToPixel( kByteValMask );
 
-		__m128 minError = _mm_set1_ps(FLT_MAX);
-		__m128i bestBucket = _mm_set1_epi32(-1);
-		for(int j = 0; j < nBuckets; j++) {
+    __m128 minError = _mm_set1_ps(FLT_MAX);
+    __m128i bestBucket = _mm_set1_epi32(-1);
+    for(int j = 0; j < nBuckets; j++) {
 
-			const __m128i jVec = _mm_set1_epi32(j);
-			const __m128i interp0 = (*interpVals)[j][0];
-			const __m128i interp1 = (*interpVals)[j][1];
+      const __m128i jVec = _mm_set1_epi32(j);
+      const __m128i interp0 = (*interpVals)[j][0];
+      const __m128i interp1 = (*interpVals)[j][1];
 
-			const __m128i ip0 = _mm_mullo_epi32( qp1, interp0 );
-			const __m128i ip1 = _mm_mullo_epi32( qp2, interp1 );
-			const __m128i ip = _mm_add_epi32( *((const __m128i *)kThirtyTwoVector), _mm_add_epi32( ip0, ip1 ) );
-			const __m128i dist = sad( _mm_and_si128( _mm_srli_epi32( ip, 6 ), kByteValMask ), pixel );
-			__m128 errorVec = _mm_cvtepi32_ps( dist );
-			
-			errorVec = _mm_mul_ps( errorVec, errorMetricVec );
-			errorVec = _mm_mul_ps( errorVec, errorVec );
-			errorVec = _mm_hadd_ps( errorVec, errorVec );
-			errorVec = _mm_hadd_ps( errorVec, errorVec );
+      const __m128i ip0 = _mm_mullo_epi32( qp1, interp0 );
+      const __m128i ip1 = _mm_mullo_epi32( qp2, interp1 );
+      const __m128i ip = _mm_add_epi32( *((const __m128i *)kThirtyTwoVector), _mm_add_epi32( ip0, ip1 ) );
+      const __m128i dist = sad( _mm_and_si128( _mm_srli_epi32( ip, 6 ), kByteValMask ), pixel );
+      __m128 errorVec = _mm_cvtepi32_ps( dist );
+      
+      errorVec = _mm_mul_ps( errorVec, errorMetricVec );
+      errorVec = _mm_mul_ps( errorVec, errorVec );
+      errorVec = _mm_hadd_ps( errorVec, errorVec );
+      errorVec = _mm_hadd_ps( errorVec, errorVec );
 
-			const __m128 cmp = _mm_cmple_ps( errorVec, minError );
-			minError = _mm_blendv_ps( minError, errorVec, cmp );
-			bestBucket = _mm_blendv_epi8( bestBucket, jVec, _mm_castps_si128( cmp ) );
+      const __m128 cmp = _mm_cmple_ps( errorVec, minError );
+      minError = _mm_blendv_ps( minError, errorVec, cmp );
+      bestBucket = _mm_blendv_epi8( bestBucket, jVec, _mm_castps_si128( cmp ) );
 
-			// Conceptually, once the error starts growing, it doesn't stop growing (we're moving
-			// farther away from the reference point along the line). Hence we can early out here.
-			// However, quanitzation artifacts mean that this is not ALWAYS the case, so we do suffer
-			// about 0.01 RMS error. 
-			if(!((uint8 *)(&cmp))[0])
-				break;
-		}
+      // Conceptually, once the error starts growing, it doesn't stop growing (we're moving
+      // farther away from the reference point along the line). Hence we can early out here.
+      // However, quanitzation artifacts mean that this is not ALWAYS the case, so we do suffer
+      // about 0.01 RMS error. 
+      if(!((uint8 *)(&cmp))[0])
+        break;
+    }
 
-		totalError = _mm_add_ps(totalError, minError);
-		if(indices) ((uint32 *)indices)[i] = ((uint32 *)(&bestBucket))[0];
-	}
+    totalError = _mm_add_ps(totalError, minError);
+    if(indices) ((uint32 *)indices)[i] = ((uint32 *)(&bestBucket))[0];
+  }
 
-	return ((float *)(&totalError))[0];
+  return ((float *)(&totalError))[0];
 }
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -404,69 +404,69 @@ float RGBAClusterSIMD::QuantizedError(const RGBAVectorSIMD &p1, const RGBAVector
 ///////////////////////////////////////////////////////////////////////////////
 
 void ClampEndpoints(RGBAVectorSIMD &p1, RGBAVectorSIMD &p2) {
-	p1.vec = _mm_min_ps( kByteMax, _mm_max_ps( p1.vec, kZero ) );
-	p2.vec = _mm_min_ps( kByteMax, _mm_max_ps( p2.vec, kZero ) );
+  p1.vec = _mm_min_ps( kByteMax, _mm_max_ps( p1.vec, kZero ) );
+  p2.vec = _mm_min_ps( kByteMax, _mm_max_ps( p2.vec, kZero ) );
 }
 
 void GetPrincipalAxis(const RGBAClusterSIMD &c, RGBADirSIMD &axis) {
 
-	if(c.GetNumPoints() == 2) {
-		axis = c.GetPoint(1) - c.GetPoint(0);
-		return;
-	}
+  if(c.GetNumPoints() == 2) {
+    axis = c.GetPoint(1) - c.GetPoint(0);
+    return;
+  }
 
-	RGBAVectorSIMD avg = c.GetTotal();
-	avg /= float(c.GetNumPoints());
+  RGBAVectorSIMD avg = c.GetTotal();
+  avg /= float(c.GetNumPoints());
 
-	// We use these vectors for calculating the covariance matrix...
-	RGBAVectorSIMD toPts[kMaxNumDataPoints];
-	RGBAVectorSIMD toPtsMax(-FLT_MAX);
-	for(int i = 0; i < c.GetNumPoints(); i++) {
-		toPts[i] = c.GetPoint(i) - avg;
-		toPtsMax.vec = _mm_max_ps(toPtsMax.vec, toPts[i].vec);
-	}
+  // We use these vectors for calculating the covariance matrix...
+  RGBAVectorSIMD toPts[kMaxNumDataPoints];
+  RGBAVectorSIMD toPtsMax(-FLT_MAX);
+  for(int i = 0; i < c.GetNumPoints(); i++) {
+    toPts[i] = c.GetPoint(i) - avg;
+    toPtsMax.vec = _mm_max_ps(toPtsMax.vec, toPts[i].vec);
+  }
 
-	RGBAMatrixSIMD covMatrix;
+  RGBAMatrixSIMD covMatrix;
 
-	// Compute covariance.
-	const float fNumPoints = float(c.GetNumPoints());
-	for(int i = 0; i < kNumColorChannels; i++) {
-		for(int j = 0; j <= i; j++) {
+  // Compute covariance.
+  const float fNumPoints = float(c.GetNumPoints());
+  for(int i = 0; i < kNumColorChannels; i++) {
+    for(int j = 0; j <= i; j++) {
 
-			float sum = 0.0;
-			for(int k = 0; k < c.GetNumPoints(); k++) {
-				sum += toPts[k].c[i] * toPts[k].c[j];
-			}
+      float sum = 0.0;
+      for(int k = 0; k < c.GetNumPoints(); k++) {
+        sum += toPts[k].c[i] * toPts[k].c[j];
+      }
 
-			covMatrix(i, j) = sum / fNumPoints;
-			covMatrix(j, i) = covMatrix(i, j);
-		}
-	}
+      covMatrix(i, j) = sum / fNumPoints;
+      covMatrix(j, i) = covMatrix(i, j);
+    }
+  }
 
-	// !SPEED! Find eigenvectors by using the power method. This is good because the
-	// matrix is only 4x4, which allows us to use SIMD...
-	RGBAVectorSIMD b = toPtsMax;
-	assert(b.Length() > 0);
-	b /= b.Length();
+  // !SPEED! Find eigenvectors by using the power method. This is good because the
+  // matrix is only 4x4, which allows us to use SIMD...
+  RGBAVectorSIMD b = toPtsMax;
+  assert(b.Length() > 0);
+  b /= b.Length();
 
-	RGBAVectorSIMD newB = covMatrix * b;
+  RGBAVectorSIMD newB = covMatrix * b;
 
-	// !HACK! If the principal eigenvector of the covariance matrix
-	// converges to zero, that means that the points lie equally 
-	// spaced on a sphere in this space. In this (extremely rare)
-	// situation, just choose a point and use it as the principal 
-	// direction.
-	const float newBlen = newB.Length();
-	if(newBlen < 1e-10) {
-		axis = toPts[0];
-		return;
-	}
+  // !HACK! If the principal eigenvector of the covariance matrix
+  // converges to zero, that means that the points lie equally 
+  // spaced on a sphere in this space. In this (extremely rare)
+  // situation, just choose a point and use it as the principal 
+  // direction.
+  const float newBlen = newB.Length();
+  if(newBlen < 1e-10) {
+    axis = toPts[0];
+    return;
+  }
 
-	for(int i = 0; i < 8; i++) {
-		newB = covMatrix * b;
-		newB.Normalize();
-		b = newB;
-	}
+  for(int i = 0; i < 8; i++) {
+    newB = covMatrix * b;
+    newB.Normalize();
+    b = newB;
+  }
 
-	axis = b;
+  axis = b;
 }
diff --git a/BPTCEncoder/src/RGBAEndpointsSIMD.h b/BPTCEncoder/src/RGBAEndpointsSIMD.h
index a83176f..6288c7c 100755
--- a/BPTCEncoder/src/RGBAEndpointsSIMD.h
+++ b/BPTCEncoder/src/RGBAEndpointsSIMD.h
@@ -81,270 +81,270 @@ static const __m128 kEpsilonSIMD = _mm_set1_ps(1e-8f);
 class RGBAVectorSIMD {
 
 public:
-	union {
-		struct { float r, g, b, a; };
-		struct { float x, y, z, w; };
-		float c[4];
-		__m128 vec;
-	};
+  union {
+    struct { float r, g, b, a; };
+    struct { float x, y, z, w; };
+    float c[4];
+    __m128 vec;
+  };
 
-	RGBAVectorSIMD() : r(-1.0), g(-1.0), b(-1.0), a(-1.0) { }
-	RGBAVectorSIMD(uint32 pixel) : 
-		r(float(pixel & 0xFF)), 
-		g(float((pixel >> 8) & 0xFF)), 
-		b(float((pixel >> 16) & 0xFF)), 
-		a(float((pixel >> 24) & 0xFF))
-	{ }
+  RGBAVectorSIMD() : r(-1.0), g(-1.0), b(-1.0), a(-1.0) { }
+  RGBAVectorSIMD(uint32 pixel) : 
+    r(float(pixel & 0xFF)), 
+    g(float((pixel >> 8) & 0xFF)), 
+    b(float((pixel >> 16) & 0xFF)), 
+    a(float((pixel >> 24) & 0xFF))
+  { }
 
-	explicit RGBAVectorSIMD(float _r, float _g, float _b, float _a) :
-		r(_r), g(_g), b(_b), a(_a) { }
+  explicit RGBAVectorSIMD(float _r, float _g, float _b, float _a) :
+    r(_r), g(_g), b(_b), a(_a) { }
 
-	explicit RGBAVectorSIMD(float cc) : r(cc), g(cc), b(cc), a(cc) { }
+  explicit RGBAVectorSIMD(float cc) : r(cc), g(cc), b(cc), a(cc) { }
 
-	RGBAVectorSIMD (const __m128 &newVec) : vec(newVec) { }
-	RGBAVectorSIMD (const RGBAVectorSIMD &other) : vec(other.vec) { }
+  RGBAVectorSIMD (const __m128 &newVec) : vec(newVec) { }
+  RGBAVectorSIMD (const RGBAVectorSIMD &other) : vec(other.vec) { }
 
-	RGBAVectorSIMD operator +(const RGBAVectorSIMD &p) const {
-		return RGBAVectorSIMD( _mm_add_ps(this->vec, p.vec) );
-	}
+  RGBAVectorSIMD operator +(const RGBAVectorSIMD &p) const {
+    return RGBAVectorSIMD( _mm_add_ps(this->vec, p.vec) );
+  }
 
-	RGBAVectorSIMD &operator +=(const RGBAVectorSIMD &p) {
-		this->vec = _mm_add_ps(this->vec, p.vec);
-		return *this;
-	}
+  RGBAVectorSIMD &operator +=(const RGBAVectorSIMD &p) {
+    this->vec = _mm_add_ps(this->vec, p.vec);
+    return *this;
+  }
 
-	RGBAVectorSIMD operator -(const RGBAVectorSIMD &p) const {
-		return RGBAVectorSIMD( _mm_sub_ps(this->vec, p.vec) );
-	}
+  RGBAVectorSIMD operator -(const RGBAVectorSIMD &p) const {
+    return RGBAVectorSIMD( _mm_sub_ps(this->vec, p.vec) );
+  }
 
-	RGBAVectorSIMD &operator -=(const RGBAVectorSIMD &p) {
-		this->vec = _mm_sub_ps(this->vec, p.vec);
-		return *this;
-	}
+  RGBAVectorSIMD &operator -=(const RGBAVectorSIMD &p) {
+    this->vec = _mm_sub_ps(this->vec, p.vec);
+    return *this;
+  }
 
-	RGBAVectorSIMD operator /(const float s) const {
-		return RGBAVectorSIMD( _mm_div_ps(this->vec, _mm_set1_ps(s) ) );
-	}
+  RGBAVectorSIMD operator /(const float s) const {
+    return RGBAVectorSIMD( _mm_div_ps(this->vec, _mm_set1_ps(s) ) );
+  }
 
-	RGBAVectorSIMD &operator /=(const float s) {
-		this->vec = _mm_div_ps(this->vec, _mm_set1_ps(s) );
-		return *this;
-	}
+  RGBAVectorSIMD &operator /=(const float s) {
+    this->vec = _mm_div_ps(this->vec, _mm_set1_ps(s) );
+    return *this;
+  }
 
-	float operator *(const RGBAVectorSIMD &p) const {
-		__m128 mul = _mm_mul_ps(this->vec, p.vec);
-		mul = _mm_hadd_ps(mul, mul);
-		mul = _mm_hadd_ps(mul, mul);
-		return ((float *)(&mul))[0];
-	}
+  float operator *(const RGBAVectorSIMD &p) const {
+    __m128 mul = _mm_mul_ps(this->vec, p.vec);
+    mul = _mm_hadd_ps(mul, mul);
+    mul = _mm_hadd_ps(mul, mul);
+    return ((float *)(&mul))[0];
+  }
 
-	void Normalize() {
-		__m128 rsqrt = _mm_rsqrt_ps( _mm_set1_ps( (*this) * (*this) ) );
-		vec = _mm_mul_ps( vec, rsqrt );
-	}
+  void Normalize() {
+    __m128 rsqrt = _mm_rsqrt_ps( _mm_set1_ps( (*this) * (*this) ) );
+    vec = _mm_mul_ps( vec, rsqrt );
+  }
 
-	float Length() const {
-		return sqrt((*this) * (*this));
-	}
+  float Length() const {
+    return sqrt((*this) * (*this));
+  }
 
-	RGBAVectorSIMD &operator *=(const RGBAVectorSIMD &v) {
-		this->vec = _mm_mul_ps(this->vec, v.vec);
-		return *this;
-	}
+  RGBAVectorSIMD &operator *=(const RGBAVectorSIMD &v) {
+    this->vec = _mm_mul_ps(this->vec, v.vec);
+    return *this;
+  }
 
-	RGBAVectorSIMD operator *(const float s) const {
-		return RGBAVectorSIMD( _mm_mul_ps( this->vec, _mm_set1_ps(s) ) );
-	}
+  RGBAVectorSIMD operator *(const float s) const {
+    return RGBAVectorSIMD( _mm_mul_ps( this->vec, _mm_set1_ps(s) ) );
+  }
 
-	friend RGBAVectorSIMD operator *(const float s, const RGBAVectorSIMD &p) {
-		return RGBAVectorSIMD( _mm_mul_ps( p.vec, _mm_set1_ps(s) ) );
-	}
+  friend RGBAVectorSIMD operator *(const float s, const RGBAVectorSIMD &p) {
+    return RGBAVectorSIMD( _mm_mul_ps( p.vec, _mm_set1_ps(s) ) );
+  }
 
-	RGBAVectorSIMD &operator *=(const float s) {
-		this->vec = _mm_mul_ps( this->vec, _mm_set1_ps(s) );
-		return *this;
-	}
+  RGBAVectorSIMD &operator *=(const float s) {
+    this->vec = _mm_mul_ps( this->vec, _mm_set1_ps(s) );
+    return *this;
+  }
 
-	float &operator [](const int i) {
-		return c[i];
-	}
+  float &operator [](const int i) {
+    return c[i];
+  }
 
-	friend bool operator ==(const RGBAVectorSIMD &rhs, const RGBAVectorSIMD &lhs) {
-		__m128 d = _mm_sub_ps(rhs.vec, lhs.vec);
-		d = _mm_mul_ps(d, d);
-		__m128 cmp = _mm_cmpgt_ps(d, kEpsilonSIMD);
-		cmp = _mm_hadd_ps(cmp, cmp);
-		cmp = _mm_hadd_ps(cmp, cmp);
-		return ((float *)(&cmp))[0] == 0.0f;
-	}
+  friend bool operator ==(const RGBAVectorSIMD &rhs, const RGBAVectorSIMD &lhs) {
+    __m128 d = _mm_sub_ps(rhs.vec, lhs.vec);
+    d = _mm_mul_ps(d, d);
+    __m128 cmp = _mm_cmpgt_ps(d, kEpsilonSIMD);
+    cmp = _mm_hadd_ps(cmp, cmp);
+    cmp = _mm_hadd_ps(cmp, cmp);
+    return ((float *)(&cmp))[0] == 0.0f;
+  }
 
-	friend bool operator !=(const RGBAVectorSIMD &rhs, const RGBAVectorSIMD &lhs) {
-		return !(rhs == lhs);
-	}
+  friend bool operator !=(const RGBAVectorSIMD &rhs, const RGBAVectorSIMD &lhs) {
+    return !(rhs == lhs);
+  }
 
-	operator float *() {
-		return c;
-	}
+  operator float *() {
+    return c;
+  }
 
-	// Quantize this point.
-	__m128i ToPixel(const __m128i &channelMask, const int pBit) const;
-	__m128i ToPixel(const __m128i &channelMask) const;
+  // Quantize this point.
+  __m128i ToPixel(const __m128i &channelMask, const int pBit) const;
+  __m128i ToPixel(const __m128i &channelMask) const;
 };
 
 class RGBAMatrixSIMD {
 private:
-	union {
-		float m[kNumColorChannels*kNumColorChannels];
-		struct {
-			float m1, m5, m9, m13;
-			float m2, m6, m10, m14;
-			float m3, m7, m11, m15;
-			float m4, m8, m12, m16;
-		};
-		__m128 col[kNumColorChannels];
-	};
+  union {
+    float m[kNumColorChannels*kNumColorChannels];
+    struct {
+      float m1, m5, m9, m13;
+      float m2, m6, m10, m14;
+      float m3, m7, m11, m15;
+      float m4, m8, m12, m16;
+    };
+    __m128 col[kNumColorChannels];
+  };
 
-	RGBAMatrixSIMD(const float *arr) {
-		memcpy(m, arr, sizeof(m));
-	}
+  RGBAMatrixSIMD(const float *arr) {
+    memcpy(m, arr, sizeof(m));
+  }
 
-	RGBAMatrixSIMD(const __m128 newcol[kNumColorChannels]) {
-		for(int i = 0; i < kNumColorChannels; i++) 
-			col[i] = newcol[i];
-	}
+  RGBAMatrixSIMD(const __m128 newcol[kNumColorChannels]) {
+    for(int i = 0; i < kNumColorChannels; i++) 
+      col[i] = newcol[i];
+  }
 
 public:
-	
-	RGBAMatrixSIMD() : 
-		m1(1.0f), m2(0.0f), m3(0.0f), m4(0.0f),
-		m5(0.0f), m6(1.0f), m7(0.0f), m8(0.0f),
-		m9(0.0f), m10(0.0f), m11(1.0f), m12(0.0f),
-		m13(0.0f), m14(0.0f), m15(0.0f), m16(1.0f)
-	{ }
+  
+  RGBAMatrixSIMD() : 
+    m1(1.0f), m2(0.0f), m3(0.0f), m4(0.0f),
+    m5(0.0f), m6(1.0f), m7(0.0f), m8(0.0f),
+    m9(0.0f), m10(0.0f), m11(1.0f), m12(0.0f),
+    m13(0.0f), m14(0.0f), m15(0.0f), m16(1.0f)
+  { }
 
-	RGBAMatrixSIMD &operator =(const RGBAMatrixSIMD &other) {
-		memcpy(m, other.m, sizeof(m));
-		return (*this);
-	}
+  RGBAMatrixSIMD &operator =(const RGBAMatrixSIMD &other) {
+    memcpy(m, other.m, sizeof(m));
+    return (*this);
+  }
 
-	RGBAMatrixSIMD operator +(const RGBAMatrixSIMD &p) const {
-		RGBAMatrixSIMD newm;
-		for(int i = 0; i < kNumColorChannels; i++) {
-			newm.col[i] = _mm_add_ps(col[i], p.col[i]);
-		}
-		return newm;
-	}
+  RGBAMatrixSIMD operator +(const RGBAMatrixSIMD &p) const {
+    RGBAMatrixSIMD newm;
+    for(int i = 0; i < kNumColorChannels; i++) {
+      newm.col[i] = _mm_add_ps(col[i], p.col[i]);
+    }
+    return newm;
+  }
 
-	RGBAMatrixSIMD &operator +=(const RGBAMatrixSIMD &p) {
-		for(int i = 0; i < kNumColorChannels; i++) {
-			col[i] = _mm_add_ps( col[i], p.col[i] );
-		}
-		return *this;
-	}
+  RGBAMatrixSIMD &operator +=(const RGBAMatrixSIMD &p) {
+    for(int i = 0; i < kNumColorChannels; i++) {
+      col[i] = _mm_add_ps( col[i], p.col[i] );
+    }
+    return *this;
+  }
 
-	RGBAMatrixSIMD operator -(const RGBAMatrixSIMD &p) const {
-		RGBAMatrixSIMD newm;
-		for(int i = 0; i < kNumColorChannels; i++) {
-			newm.col[i] = _mm_sub_ps( col[i], p.col[i] );
-		}
-		return newm;
-	}
+  RGBAMatrixSIMD operator -(const RGBAMatrixSIMD &p) const {
+    RGBAMatrixSIMD newm;
+    for(int i = 0; i < kNumColorChannels; i++) {
+      newm.col[i] = _mm_sub_ps( col[i], p.col[i] );
+    }
+    return newm;
+  }
 
-	RGBAMatrixSIMD &operator -=(const RGBAMatrixSIMD &p) {
-		for(int i = 0; i < kNumColorChannels; i++) {
-			col[i] = _mm_sub_ps( col[i], p.col[i] );
-		}
-		return *this;
-	}
+  RGBAMatrixSIMD &operator -=(const RGBAMatrixSIMD &p) {
+    for(int i = 0; i < kNumColorChannels; i++) {
+      col[i] = _mm_sub_ps( col[i], p.col[i] );
+    }
+    return *this;
+  }
 
-	RGBAMatrixSIMD operator /(const float s) const {
-		__m128 f = _mm_set1_ps(s);
-		RGBAMatrixSIMD newm;
+  RGBAMatrixSIMD operator /(const float s) const {
+    __m128 f = _mm_set1_ps(s);
+    RGBAMatrixSIMD newm;
 
-		for(int i = 0; i < kNumColorChannels; i++) {
-			newm.col[i] = _mm_div_ps( col[i], f );
-		}
+    for(int i = 0; i < kNumColorChannels; i++) {
+      newm.col[i] = _mm_div_ps( col[i], f );
+    }
 
-		return newm;
-	}
+    return newm;
+  }
 
-	RGBAMatrixSIMD &operator /=(const float s) {
+  RGBAMatrixSIMD &operator /=(const float s) {
 
-		__m128 f = _mm_set1_ps(s);
+    __m128 f = _mm_set1_ps(s);
 
-		for(int i = 0; i < kNumColorChannels; i++) {
-			col[i] = _mm_div_ps(col[i], f);
-		}
+    for(int i = 0; i < kNumColorChannels; i++) {
+      col[i] = _mm_div_ps(col[i], f);
+    }
 
-		return *this;
-	}
+    return *this;
+  }
 
-	RGBAMatrixSIMD operator *(const float s) const {
-		__m128 f = _mm_set1_ps(s);
+  RGBAMatrixSIMD operator *(const float s) const {
+    __m128 f = _mm_set1_ps(s);
 
-		RGBAMatrixSIMD newm;
-		for(int i = 0; i < kNumColorChannels; i++) {
-			newm.col[i] = _mm_mul_ps( col[i], f );
-		}
-		return newm;
-	}
+    RGBAMatrixSIMD newm;
+    for(int i = 0; i < kNumColorChannels; i++) {
+      newm.col[i] = _mm_mul_ps( col[i], f );
+    }
+    return newm;
+  }
 
-	friend RGBAMatrixSIMD operator *(const float s, const RGBAMatrixSIMD &p) {
-		__m128 f = _mm_set1_ps(s);
-		RGBAMatrixSIMD newm;
+  friend RGBAMatrixSIMD operator *(const float s, const RGBAMatrixSIMD &p) {
+    __m128 f = _mm_set1_ps(s);
+    RGBAMatrixSIMD newm;
 
-		for(int i = 0; i < kNumColorChannels; i++) {
-			newm.col[i] = _mm_mul_ps( p.col[i], f );
-		}
-		return newm;
-	}
+    for(int i = 0; i < kNumColorChannels; i++) {
+      newm.col[i] = _mm_mul_ps( p.col[i], f );
+    }
+    return newm;
+  }
 
-	RGBAMatrixSIMD &operator *=(const float s) {
-		__m128 f = _mm_set1_ps(s);
-		for(int i = 0; i < kNumColorChannels; i++) 
-			col[i] = _mm_mul_ps(col[i], f);
-		return *this;
-	}
+  RGBAMatrixSIMD &operator *=(const float s) {
+    __m128 f = _mm_set1_ps(s);
+    for(int i = 0; i < kNumColorChannels; i++) 
+      col[i] = _mm_mul_ps(col[i], f);
+    return *this;
+  }
 
-	float &operator ()(const int i, const int j) {
-		return (*this)[j*4 + i];
-	}
+  float &operator ()(const int i, const int j) {
+    return (*this)[j*4 + i];
+  }
 
-	float &operator [](const int i) {
-		return m[i];
-	}
+  float &operator [](const int i) {
+    return m[i];
+  }
 
-	friend bool operator ==(const RGBAMatrixSIMD &rhs, const RGBAMatrixSIMD &lhs) {
-		
-		__m128 sum = _mm_set1_ps(0.0f);
-		for(int i = 0; i < kNumColorChannels; i++) {
-			__m128 d = _mm_sub_ps(rhs.col[i], lhs.col[i]);
-			d = _mm_mul_ps(d, d);
-			__m128 cmp = _mm_cmpgt_ps(d, kEpsilonSIMD);
-			cmp = _mm_hadd_ps(cmp, cmp);
-			cmp = _mm_hadd_ps(cmp, cmp);
-			sum = _mm_add_ps(sum, cmp);
-		}
+  friend bool operator ==(const RGBAMatrixSIMD &rhs, const RGBAMatrixSIMD &lhs) {
+    
+    __m128 sum = _mm_set1_ps(0.0f);
+    for(int i = 0; i < kNumColorChannels; i++) {
+      __m128 d = _mm_sub_ps(rhs.col[i], lhs.col[i]);
+      d = _mm_mul_ps(d, d);
+      __m128 cmp = _mm_cmpgt_ps(d, kEpsilonSIMD);
+      cmp = _mm_hadd_ps(cmp, cmp);
+      cmp = _mm_hadd_ps(cmp, cmp);
+      sum = _mm_add_ps(sum, cmp);
+    }
 
-		if(((float *)(&sum))[0] != 0)
-			return false;
-		else
-			return true;
-	}
+    if(((float *)(&sum))[0] != 0)
+      return false;
+    else
+      return true;
+  }
 
-	operator float *() {
-		return m;
-	}
+  operator float *() {
+    return m;
+  }
 
-	RGBAVectorSIMD operator *(const RGBAVectorSIMD &p) const;
+  RGBAVectorSIMD operator *(const RGBAVectorSIMD &p) const;
 };
 
 class RGBADirSIMD : public RGBAVectorSIMD {
 public:
-	RGBADirSIMD() : RGBAVectorSIMD() { }
-	RGBADirSIMD(const RGBAVectorSIMD &p) : RGBAVectorSIMD(p) {
-		this->Normalize();
-	}
+  RGBADirSIMD() : RGBAVectorSIMD() { }
+  RGBADirSIMD(const RGBAVectorSIMD &p) : RGBAVectorSIMD(p) {
+    this->Normalize();
+  }
 };
 
 // Makes sure that the values of the endpoints lie between 0 and 1.
@@ -353,69 +353,69 @@ extern void ClampEndpoints(RGBAVectorSIMD &p1, RGBAVectorSIMD &p2);
 class RGBAClusterSIMD {
 public:
 
-	RGBAClusterSIMD() : 
-	  m_NumPoints(0), m_Total(0.0f), 
-	  m_PointBitString(0),
-	  m_Min(FLT_MAX),
-	  m_Max(-FLT_MAX),
-	  m_PrincipalAxisCached(false)
-	{ } 
+  RGBAClusterSIMD() : 
+    m_NumPoints(0), m_Total(0.0f), 
+    m_PointBitString(0),
+    m_Min(FLT_MAX),
+    m_Max(-FLT_MAX),
+    m_PrincipalAxisCached(false)
+  { } 
 
-	RGBAClusterSIMD(const RGBAClusterSIMD &c) : 
-		m_NumPoints(c.m_NumPoints),
-		m_Total(c.m_Total),
-		m_PointBitString(c.m_PointBitString), 
-		m_Min(c.m_Min),
-		m_Max(c.m_Max),
-		m_PrincipalAxisCached(false)
-	{ 
-		memcpy(this->m_DataPoints, c.m_DataPoints, m_NumPoints * sizeof(RGBAVectorSIMD));
-	}
+  RGBAClusterSIMD(const RGBAClusterSIMD &c) : 
+    m_NumPoints(c.m_NumPoints),
+    m_Total(c.m_Total),
+    m_PointBitString(c.m_PointBitString), 
+    m_Min(c.m_Min),
+    m_Max(c.m_Max),
+    m_PrincipalAxisCached(false)
+  { 
+    memcpy(this->m_DataPoints, c.m_DataPoints, m_NumPoints * sizeof(RGBAVectorSIMD));
+  }
 
-	RGBAClusterSIMD(const RGBAClusterSIMD &left, const RGBAClusterSIMD &right);
-	RGBAClusterSIMD(const RGBAVectorSIMD &p, int idx) : 
-		m_NumPoints(1),
-		m_Total(p),
-		m_PointBitString(0),
-		m_Min(p), m_Max(p),
-		m_PrincipalAxisCached(false)
-	{ 
-		m_DataPoints[0] = p;
-		m_PointBitString |= (1 << idx);
-	}
-			
-	RGBAVectorSIMD GetTotal() const { return m_Total; }
-	const RGBAVectorSIMD &GetPoint(int idx) const { return m_DataPoints[idx]; }
-	int GetNumPoints() const { return m_NumPoints; }
-	RGBAVectorSIMD GetAvg() const { return m_Total / float(m_NumPoints); }
+  RGBAClusterSIMD(const RGBAClusterSIMD &left, const RGBAClusterSIMD &right);
+  RGBAClusterSIMD(const RGBAVectorSIMD &p, int idx) : 
+    m_NumPoints(1),
+    m_Total(p),
+    m_PointBitString(0),
+    m_Min(p), m_Max(p),
+    m_PrincipalAxisCached(false)
+  { 
+    m_DataPoints[0] = p;
+    m_PointBitString |= (1 << idx);
+  }
+      
+  RGBAVectorSIMD GetTotal() const { return m_Total; }
+  const RGBAVectorSIMD &GetPoint(int idx) const { return m_DataPoints[idx]; }
+  int GetNumPoints() const { return m_NumPoints; }
+  RGBAVectorSIMD GetAvg() const { return m_Total / float(m_NumPoints); }
 
-	void AddPoint(const RGBAVectorSIMD &p, int idx);
+  void AddPoint(const RGBAVectorSIMD &p, int idx);
 
-	void GetBoundingBox(RGBAVectorSIMD &Min, RGBAVectorSIMD &Max) const {
-		Min = m_Min, Max = m_Max;
-	}
+  void GetBoundingBox(RGBAVectorSIMD &Min, RGBAVectorSIMD &Max) const {
+    Min = m_Min, Max = m_Max;
+  }
 
-	// Returns the error if we were to quantize the colors right now with the given number of buckets and bit mask.
-	float QuantizedError(const RGBAVectorSIMD &p1, const RGBAVectorSIMD &p2, const uint8 nBuckets, const __m128i &bitMask, const int pbits[2] = NULL, __m128i *indices = NULL) const;
+  // Returns the error if we were to quantize the colors right now with the given number of buckets and bit mask.
+  float QuantizedError(const RGBAVectorSIMD &p1, const RGBAVectorSIMD &p2, const uint8 nBuckets, const __m128i &bitMask, const int pbits[2] = NULL, __m128i *indices = NULL) const;
 
-	bool AllSamePoint() const { return m_Max == m_Min; }
-	int GetPointBitString() const { return m_PointBitString; }
+  bool AllSamePoint() const { return m_Max == m_Min; }
+  int GetPointBitString() const { return m_PointBitString; }
 
 private:
 
-	// The number of points in the cluster.
-	int m_NumPoints;
+  // The number of points in the cluster.
+  int m_NumPoints;
 
-	RGBAVectorSIMD m_Total;
+  RGBAVectorSIMD m_Total;
 
-	// The points in the cluster.
-	RGBAVectorSIMD m_DataPoints[kMaxNumDataPoints];
+  // The points in the cluster.
+  RGBAVectorSIMD m_DataPoints[kMaxNumDataPoints];
 
-	RGBAVectorSIMD m_Min, m_Max;
-	int m_PointBitString;
+  RGBAVectorSIMD m_Min, m_Max;
+  int m_PointBitString;
 
-	RGBADirSIMD m_PrincipalAxis;
-	bool m_PrincipalAxisCached;
+  RGBADirSIMD m_PrincipalAxis;
+  bool m_PrincipalAxisCached;
 };
 
 extern void GetPrincipalAxis(const RGBAClusterSIMD &c, RGBADirSIMD &axis);
diff --git a/CLTool/src/clwin32.cpp b/CLTool/src/clwin32.cpp
index 62b51c4..d746a22 100644
--- a/CLTool/src/clwin32.cpp
+++ b/CLTool/src/clwin32.cpp
@@ -64,18 +64,18 @@ void PrintUsage() {
 }
 
 void ExtractBasename(const char *filename, char *buf, uint32 bufSz) {
-	size_t len = strlen(filename);
-	const char *end = filename + len;
-	while(--end != filename) {
-		if(*end == '.')
-		{
-			uint32 numChars = int32(end - filename + 1);
-			uint32 toCopy = (numChars > bufSz)? bufSz : numChars;
-			memcpy(buf, filename, toCopy);
-			buf[toCopy - 1] = '\0';
-			return;
-		}
-	}
+  size_t len = strlen(filename);
+  const char *end = filename + len;
+  while(--end != filename) {
+    if(*end == '.')
+    {
+      uint32 numChars = int32(end - filename + 1);
+      uint32 toCopy = (numChars > bufSz)? bufSz : numChars;
+      memcpy(buf, filename, toCopy);
+      buf[toCopy - 1] = '\0';
+      return;
+    }
+  }
 }
 
 int _tmain(int argc, _TCHAR* argv[])
@@ -175,7 +175,7 @@ int _tmain(int argc, _TCHAR* argv[])
   if(numThreads > 1 && bSaveLog) {
     bSaveLog = false;
     fprintf(stderr, "WARNING: Will not save log because implementation is not thread safe.\n"
-	    "If you'd like, send a complaint to pavel@cs.unc.edu to get this done faster.\n");
+      "If you'd like, send a complaint to pavel@cs.unc.edu to get this done faster.\n");
   }
 
   if(fileArg == argc) {
@@ -183,16 +183,16 @@ int _tmain(int argc, _TCHAR* argv[])
     exit(1);
   }
 
-	char basename[256];
-	ExtractBasename(argv[fileArg], basename, 256);
+  char basename[256];
+  ExtractBasename(argv[fileArg], basename, 256);
 
   ImageFile file (argv[fileArg]);
-	if(!file.Load()) {
+  if(!file.Load()) {
     fprintf(stderr, "Error loading file: %s\n", argv[fileArg]);
     return 1;
-	}
+  }
 
-	const Image *img = file.GetImage();
+  const Image *img = file.GetImage();
 
   int numBlocks = (img->GetWidth() * img->GetHeight())/16;
   BlockStatManager *statManager = NULL;
@@ -224,14 +224,14 @@ int _tmain(int argc, _TCHAR* argv[])
   }
 
   if(bSaveLog) {
-	  strcat_s(basename, ".log");
+    strcat_s(basename, ".log");
     statManager->ToFile(basename);
-	basename[strlen(basename) - 4] = '\0';
+  basename[strlen(basename) - 4] = '\0';
   }
   strcat_s(basename, "-bc7.png");
-	Image cImg (*ci);
-	ImageFile cImgFile (basename, eFileFormat_PNG, cImg);
-	cImgFile.Write();
+  Image cImg (*ci);
+  ImageFile cImgFile (basename, eFileFormat_PNG, cImg);
+  cImgFile.Write();
 
   // Cleanup 
   delete ci;
diff --git a/Core/include/Image.h b/Core/include/Image.h
index 3de33b9..7086916 100644
--- a/Core/include/Image.h
+++ b/Core/include/Image.h
@@ -54,7 +54,7 @@ class ImageLoader;
 class Image {
 
  public:
-	Image(const CompressedImage &);
+  Image(const CompressedImage &);
   Image(const ImageLoader &);
   ~Image();
   
diff --git a/Core/src/BlockStats.cpp b/Core/src/BlockStats.cpp
index 2bd93dd..89de020 100644
--- a/Core/src/BlockStats.cpp
+++ b/Core/src/BlockStats.cpp
@@ -165,8 +165,8 @@ BlockStatManager::~BlockStatManager() {
 
   if(m_Mutex)
   {
-	delete m_Mutex;
-	m_Mutex = 0;
+    delete m_Mutex;
+    m_Mutex = 0;
   }
 }
 
@@ -206,15 +206,15 @@ void BlockStatManager::ToFile(const CHAR *filename) {
 
       CHAR str[256];
 #ifdef _MSC_VER
-	  _sntprintf_s(str, 256, _TRUNCATE, "%d,%s\n", i, statStr);
+      _sntprintf_s(str, 256, _TRUNCATE, "%d,%s\n", i, statStr);
 #else
       snprintf(str, 256, "%d,%s\n", i, statStr);
 #endif
       
       uint32 strLen = uint32(strlen(str));
       if(strLen > 255) {
-	    str[255] = '\n';
-	    strLen = 256;
+        str[255] = '\n';
+        strLen = 256;
       }
  
       fstr.Write((uint8 *)str, strLen);
diff --git a/Core/src/CompressedImage.cpp b/Core/src/CompressedImage.cpp
index 7250840..0f4f393 100644
--- a/Core/src/CompressedImage.cpp
+++ b/Core/src/CompressedImage.cpp
@@ -70,16 +70,16 @@ CompressedImage::CompressedImage( const CompressedImage &other )
 }
 
 CompressedImage::CompressedImage(
-  const unsigned int width,				 
+  const unsigned int width,
   const unsigned int height,
   const ECompressionFormat format,
   const unsigned char *data
 ) 
-: m_Width(width)
-, m_Height(height)
-, m_Format(format)
-, m_Data(0)
-, m_DataSz(0)
+  : m_Width(width)
+  , m_Height(height)
+  , m_Format(format)
+  , m_Data(0)
+  , m_DataSz(0)
 {
   InitData(data);
 }
@@ -94,7 +94,7 @@ void CompressedImage::InitData(const unsigned char *withData) {
     case eCompressionFormat_DXT5: m_DataSz = uncompDataSz / 4; break;
     case eCompressionFormat_BPTC: m_DataSz = uncompDataSz / 4; break;
   }
-  
+
   if(m_DataSz > 0) {
     m_Data = new unsigned char[m_DataSz];
     memcpy(m_Data, withData, m_DataSz);
diff --git a/Core/src/Image.cpp b/Core/src/Image.cpp
index b0e67f6..f507186 100644
--- a/Core/src/Image.cpp
+++ b/Core/src/Image.cpp
@@ -95,14 +95,14 @@ Image::Image(const CompressedImage &ci)
   : m_Width(ci.GetWidth())
   , m_Height(ci.GetHeight())
 {
-	unsigned int bufSz = ci.GetWidth() * ci.GetHeight() * 4;
-	m_PixelData = new uint8[ bufSz ];
-	if(!m_PixelData) { fprintf(stderr, "%s\n", "Out of memory!"); return; }
+  unsigned int bufSz = ci.GetWidth() * ci.GetHeight() * 4;
+  m_PixelData = new uint8[ bufSz ];
+  if(!m_PixelData) { fprintf(stderr, "%s\n", "Out of memory!"); return; }
 
-	if(!ci.DecompressImage(m_PixelData, bufSz)) {
-		fprintf(stderr, "Error decompressing image!\n");
-		return;
-	}
+  if(!ci.DecompressImage(m_PixelData, bufSz)) {
+    fprintf(stderr, "Error decompressing image!\n");
+    return;
+  }
 }
 
 Image::Image(const ImageLoader &loader) 
diff --git a/Core/src/StopWatch.h b/Core/src/StopWatch.h
index 53d939d..a31dd73 100755
--- a/Core/src/StopWatch.h
+++ b/Core/src/StopWatch.h
@@ -74,23 +74,23 @@ class StopWatchImpl;
 class StopWatch
 {
 public:
-	StopWatch();
-	StopWatch(const StopWatch &);
+  StopWatch();
+  StopWatch(const StopWatch &);
 
-	~StopWatch();
+  ~StopWatch();
 
-	StopWatch &operator=(const StopWatch &);
+  StopWatch &operator=(const StopWatch &);
 
-	void Start();
-	void Stop();
-	void Reset();
+  void Start();
+  void Stop();
+  void Reset();
 
-	double TimeInSeconds() const;
-	double TimeInMilliseconds() const;
-	double TimeInMicroseconds() const;
+  double TimeInSeconds() const;
+  double TimeInMilliseconds() const;
+  double TimeInMicroseconds() const;
 
 private:
-	StopWatchImpl *impl;
+  StopWatchImpl *impl;
 };
 
 #endif // __TEXCOMP_STOP_WATCH_H__
diff --git a/Core/src/StopWatchOSX.cpp b/Core/src/StopWatchOSX.cpp
index 50f6363..063828a 100644
--- a/Core/src/StopWatchOSX.cpp
+++ b/Core/src/StopWatchOSX.cpp
@@ -101,7 +101,7 @@ double StopWatch::TimeInSeconds() const {
 double StopWatch::TimeInMilliseconds() const {
   return double(impl->duration) / 1e3;
 }
-	
+
 double StopWatch::TimeInMicroseconds() const {
   return double(impl->duration);
 }
diff --git a/Core/src/StopWatchUnix.cpp b/Core/src/StopWatchUnix.cpp
index 3694b68..4b17f6b 100644
--- a/Core/src/StopWatchUnix.cpp
+++ b/Core/src/StopWatchUnix.cpp
@@ -100,7 +100,7 @@ double StopWatch::TimeInSeconds() const {
 double StopWatch::TimeInMilliseconds() const {
   return impl->duration * 1000;
 }
-	
+
 double StopWatch::TimeInMicroseconds() const {
   return impl->duration * 1000000;
 }
diff --git a/Core/src/TexComp.cpp b/Core/src/TexComp.cpp
index 1621fb5..3cf0bf5 100644
--- a/Core/src/TexComp.cpp
+++ b/Core/src/TexComp.cpp
@@ -404,5 +404,5 @@ bool CompressImageData(
 }
 
 void YieldThread() {
-	TCThread::Yield();
+  TCThread::Yield();
 }
diff --git a/Core/src/ThreadGroup.cpp b/Core/src/ThreadGroup.cpp
index 33a58eb..49be9f5 100644
--- a/Core/src/ThreadGroup.cpp
+++ b/Core/src/ThreadGroup.cpp
@@ -115,7 +115,7 @@ ThreadGroup::ThreadGroup( int numThreads, const unsigned char *inBuf, unsigned i
   , m_CompressedBlockSize(
        (func == BC7C::Compress 
 #ifdef HAS_SSE_41
-	|| func == BC7C::CompressImageBC7SIMD
+  || func == BC7C::CompressImageBC7SIMD
 #endif
        )? 
          16 
@@ -125,7 +125,7 @@ ThreadGroup::ThreadGroup( int numThreads, const unsigned char *inBuf, unsigned i
   , m_UncompressedBlockSize(
        (func == BC7C::Compress 
 #ifdef HAS_SSE_41
-	|| func == BC7C::CompressImageBC7SIMD
+  || func == BC7C::CompressImageBC7SIMD
 #endif
        )? 
          64 
diff --git a/Core/src/WorkerQueue.cpp b/Core/src/WorkerQueue.cpp
index 7f74e56..f99499f 100644
--- a/Core/src/WorkerQueue.cpp
+++ b/Core/src/WorkerQueue.cpp
@@ -81,40 +81,39 @@ void WorkerThread::operator()() {
   bool quitFlag = false;
   while(!quitFlag) {
     
-    switch(m_Parent->AcceptThreadData(m_ThreadIdx)) 
-    {
+    switch(m_Parent->AcceptThreadData(m_ThreadIdx)) {
 
       case eAction_Quit:
       {
-	quitFlag = true;
-	break;
+        quitFlag = true;
+        break;
       }
 
       case eAction_Wait:
       {
-	TCThread::Yield();
-	break;
+        TCThread::Yield();
+        break;
       }
 
       case eAction_DoWork:
       {
-	const uint8 *src = m_Parent->GetSrcForThread(m_ThreadIdx);
-	uint8 *dst = m_Parent->GetDstForThread(m_ThreadIdx);
+        const uint8 *src = m_Parent->GetSrcForThread(m_ThreadIdx);
+        uint8 *dst = m_Parent->GetDstForThread(m_ThreadIdx);
 
-  CompressionJob cj (src, dst, 4 * m_Parent->GetNumBlocksForThread(m_ThreadIdx), 4);
-	if(f)
-	  (*f)(cj);
-	else
-	  (*fStat)(cj, *statManager);
+        CompressionJob cj (src, dst, 4 * m_Parent->GetNumBlocksForThread(m_ThreadIdx), 4);
+        if(f)
+          (*f)(cj);
+        else
+          (*fStat)(cj, *statManager);
 
-	break;
+        break;
       }
 
       default:
       {
-	fprintf(stderr, "Unrecognized thread command!\n");
-	quitFlag = true;
-	break;
+        fprintf(stderr, "Unrecognized thread command!\n");
+        quitFlag = true;
+        break;
       }
     }
   }
@@ -244,10 +243,10 @@ WorkerThread::EAction WorkerQueue::AcceptThreadData(uint32 threadIdx) {
   if(m_NextBlock == totalBlocks) {
     if(m_NumCompressions < m_TotalNumCompressions) {
       if(++m_WaitingThreads == m_ActiveThreads) {
-	m_NextBlock = 0;
-	m_WaitingThreads = 0;
+        m_NextBlock = 0;
+        m_WaitingThreads = 0;
       } else {
-	return WorkerThread::eAction_Wait;
+        return WorkerThread::eAction_Wait;
       }
     }
     else {
diff --git a/IO/src/FileStreamWin32.cpp b/IO/src/FileStreamWin32.cpp
index e51eb8a..a9e1581 100755
--- a/IO/src/FileStreamWin32.cpp
+++ b/IO/src/FileStreamWin32.cpp
@@ -1,3 +1,55 @@
+/* FasTC
+ * Copyright (c) 2012 University of North Carolina at Chapel Hill.
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation for educational, research, and non-profit purposes, without
+ * fee, and without a written agreement is hereby granted, provided that the
+ * above copyright notice, this paragraph, and the following four paragraphs
+ * appear in all copies.
+ *
+ * Permission to incorporate this software into commercial products may be
+ * obtained by contacting the authors or the Office of Technology Development
+ * at the University of North Carolina at Chapel Hill <otd@unc.edu>.
+ *
+ * This software program and documentation are copyrighted by the University of
+ * North Carolina at Chapel Hill. The software program and documentation are
+ * supplied "as is," without any accompanying services from the University of
+ * North Carolina at Chapel Hill or the authors. The University of North
+ * Carolina at Chapel Hill and the authors do not warrant that the operation of
+ * the program will be uninterrupted or error-free. The end-user understands
+ * that the program was developed for research purposes and is advised not to
+ * rely exclusively on the program for any reason.
+ *
+ * IN NO EVENT SHALL THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE
+ * AUTHORS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL,
+ * OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
+ * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF NORTH CAROLINA
+ * AT CHAPEL HILL OR THE AUTHORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS SPECIFICALLY
+ * DISCLAIM ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY 
+ * STATUTORY WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON
+ * AN "AS IS" BASIS, AND THE UNIVERSITY  OF NORTH CAROLINA AT CHAPEL HILL AND
+ * THE AUTHORS HAVE NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, 
+ * ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Please send all BUG REPORTS to <pavel@cs.unc.edu>.
+ *
+ * The authors may be contacted via:
+ *
+ * Pavel Krajcevski
+ * Dept of Computer Science
+ * 201 S Columbia St
+ * Frederick P. Brooks, Jr. Computer Science Bldg
+ * Chapel Hill, NC 27599-3175
+ * USA
+ * 
+ * <http://gamma.cs.unc.edu/FasTC/>
+ */
+
 #include "FileStream.h"
 
 #include <Windows.h>
@@ -54,7 +106,7 @@ public:
     : m_ReferenceCount(1)
   {
 
-	DWORD dwDesiredAccess = GENERIC_READ;
+  DWORD dwDesiredAccess = GENERIC_READ;
   DWORD dwOpenAction = OPEN_EXISTING;
   switch(mode) {
     default:
@@ -71,13 +123,13 @@ public:
 
     case eFileMode_WriteAppend:
     case eFileMode_WriteBinaryAppend:
-		  dwDesiredAccess = FILE_APPEND_DATA;
+      dwDesiredAccess = FILE_APPEND_DATA;
       dwOpenAction = CREATE_NEW;
       break;
     }
 
     m_Handle = CreateFile(filename, dwDesiredAccess, 0, NULL, dwOpenAction, FILE_ATTRIBUTE_NORMAL, NULL);
-	  if(m_Handle == INVALID_HANDLE_VALUE) {
+    if(m_Handle == INVALID_HANDLE_VALUE) {
       ErrorExit(TEXT("CreateFile"));
     }
   }
@@ -145,15 +197,14 @@ FileStream::~FileStream() {
 
 int32 FileStream::Read(uint8 *buf, uint32 bufSz) {
 
-  if(
-     m_Mode == eFileMode_Write ||
+  if(m_Mode == eFileMode_Write ||
      m_Mode == eFileMode_WriteBinary ||
      m_Mode == eFileMode_WriteAppend ||
      m_Mode == eFileMode_WriteBinaryAppend
   ) {
     CHAR errStr[256];
-	_sntprintf_s(errStr, 256, "Cannot read from file '%s': File opened for reading.", m_Filename);
-	OutputDebugString(errStr);
+  _sntprintf_s(errStr, 256, "Cannot read from file '%s': File opened for reading.", m_Filename);
+  OutputDebugString(errStr);
     return -2;
   }
 
@@ -163,27 +214,27 @@ int32 FileStream::Read(uint8 *buf, uint32 bufSz) {
 
   DWORD oldPosition = SetFilePointer(fp, 0, NULL, FILE_CURRENT);
   if(INVALID_SET_FILE_POINTER == oldPosition) {
-	CHAR errStr[256];
-	_sntprintf_s(errStr, 256, "Error querying the file position before reading from file '%s'(0x%x).", m_Filename, GetLastError());
-	OutputDebugString(errStr);
-	return -1;
+    CHAR errStr[256];
+    _sntprintf_s(errStr, 256, "Error querying the file position before reading from file '%s'(0x%x).", m_Filename, GetLastError());
+    OutputDebugString(errStr);
+    return -1;
   }
 
   DWORD amtRead;
   BOOL success = ReadFile(fp, buf, bufSz, &amtRead, NULL);
   if(!success) {
-	CHAR errStr[256];
-	_sntprintf_s(errStr, 256, "Error reading from file '%s'.", m_Filename);
-	OutputDebugString(errStr);
+    CHAR errStr[256];
+    _sntprintf_s(errStr, 256, "Error reading from file '%s'.", m_Filename);
+    OutputDebugString(errStr);
     return -1;
   }
 
   DWORD newPosition = SetFilePointer(fp, 0, NULL, FILE_CURRENT);
   if(INVALID_SET_FILE_POINTER == newPosition) {
-	CHAR errStr[256];
-	_sntprintf_s(errStr, 256, "Error querying the file position after reading from file '%s'(0x%x).", m_Filename, GetLastError());
-	OutputDebugString(errStr);
-	return -1;
+    CHAR errStr[256];
+    _sntprintf_s(errStr, 256, "Error querying the file position after reading from file '%s'(0x%x).", m_Filename, GetLastError());
+    OutputDebugString(errStr);
+    return -1;
   }
 
   return newPosition - oldPosition;
@@ -194,9 +245,9 @@ int32 FileStream::Write(const uint8 *buf, uint32 bufSz) {
      m_Mode == eFileMode_Read ||
      m_Mode == eFileMode_ReadBinary
   ) {
-	CHAR errStr[256];
-	_sntprintf_s(errStr, 256, "Cannot write to file '%s': File opened for writing.", m_Filename);
-	OutputDebugString(errStr);
+  CHAR errStr[256];
+  _sntprintf_s(errStr, 256, "Cannot write to file '%s': File opened for writing.", m_Filename);
+  OutputDebugString(errStr);
     return -2;
   }
 
@@ -213,10 +264,10 @@ int32 FileStream::Write(const uint8 *buf, uint32 bufSz) {
   }
 
   if(INVALID_SET_FILE_POINTER == dwPos) {
-	CHAR errStr[256];
-	_sntprintf_s(errStr, 256, "Error querying the file position before reading to file '%s'(0x%x).", m_Filename, GetLastError());
-	OutputDebugString(errStr);
-	return -1;
+  CHAR errStr[256];
+  _sntprintf_s(errStr, 256, "Error querying the file position before reading to file '%s'(0x%x).", m_Filename, GetLastError());
+  OutputDebugString(errStr);
+  return -1;
   }
 
   while(!LockFile(fp, dwPos, 0, bufSz, 0)) Sleep(1);
@@ -227,9 +278,9 @@ int32 FileStream::Write(const uint8 *buf, uint32 bufSz) {
   UnlockFile(fp, dwPos, 0, bufSz, 0);
 
   if(!success) {
-	CHAR errStr[256];
-	_sntprintf_s(errStr, 256, "Error writing to file '%s'.", m_Filename);
-	OutputDebugString(errStr);
+  CHAR errStr[256];
+  _sntprintf_s(errStr, 256, "Error writing to file '%s'.", m_Filename);
+  OutputDebugString(errStr);
     return -1;
   }
 
@@ -244,17 +295,17 @@ int32 FileStream::Tell() {
 
   DWORD pos =  SetFilePointer(fp, 0, NULL, FILE_CURRENT);
   if(INVALID_SET_FILE_POINTER == pos) {
-	CHAR errStr[256];
-	_sntprintf_s(errStr, 256, "Error querying the file position before reading to file '%s'(0x%x).", m_Filename, GetLastError());
-	OutputDebugString(errStr);
-	return -1;
+    CHAR errStr[256];
+    _sntprintf_s(errStr, 256, "Error querying the file position before reading to file '%s'(0x%x).", m_Filename, GetLastError());
+    OutputDebugString(errStr);
+    return -1;
   }
 
   return pos;
 }
 
 bool FileStream::Seek(uint32 offset, ESeekPosition pos) {
-  
+
   // We cannot seek in append mode.
   if(m_Mode == eFileMode_WriteAppend || m_Mode == eFileMode_WriteBinaryAppend)
     return false;
@@ -264,17 +315,17 @@ bool FileStream::Seek(uint32 offset, ESeekPosition pos) {
 
   DWORD origin = FILE_BEGIN;
   switch(pos) {
-  default:
-  case eSeekPosition_Beginning:
-    // Do nothing
+    default:
+    case eSeekPosition_Beginning:
+      // Do nothing
     break;
 
-  case eSeekPosition_Current:
-    origin = FILE_CURRENT;
+    case eSeekPosition_Current:
+      origin = FILE_CURRENT;
     break;
 
-  case eSeekPosition_End:
-    origin = FILE_END;
+    case eSeekPosition_End:
+      origin = FILE_END;
     break;
   }
 
diff --git a/IO/src/ImageFile.cpp b/IO/src/ImageFile.cpp
index 0fb476c..cd1596e 100644
--- a/IO/src/ImageFile.cpp
+++ b/IO/src/ImageFile.cpp
@@ -125,7 +125,7 @@ bool ImageFile::Load() {
     delete m_Image;
     m_Image = NULL;
   }
-	
+  
   unsigned char *rawData = ReadFileData(m_Filename);
   if(rawData) {
     m_Image = LoadImage(rawData);
@@ -272,7 +272,7 @@ unsigned char *ImageFile::ReadFileData(const CHAR *filename) {
 bool ImageFile::WriteImageDataToFile(const uint8 *data,
                                      const uint32 dataSz,
                                      const CHAR *filename) {
-	
+
   // Open a file stream and write out the data...
   FileStream fstr (filename, eFileMode_WriteBinary);
   if(fstr.Tell() < 0) {
diff --git a/IO/src/ImageLoader.cpp b/IO/src/ImageLoader.cpp
index b6dc03f..57886b1 100644
--- a/IO/src/ImageLoader.cpp
+++ b/IO/src/ImageLoader.cpp
@@ -172,54 +172,54 @@ bool ImageLoader::LoadImage() {
 
       // For each block, visit the pixels in sequential order
       for(uint32 y = i; y < i+4; y++) {
-		for(uint32 x = j; x < j+4; x++) {
+        for(uint32 x = j; x < j+4; x++) {
 
-		  if(y >= m_Height || x >= m_Width) {
-			m_PixelData[byteIdx++] = 0; // r
-			m_PixelData[byteIdx++] = 0; // g
-			m_PixelData[byteIdx++] = 0; // b
-			m_PixelData[byteIdx++] = 0; // a
-			continue;
-		  }
+          if(y >= m_Height || x >= m_Width) {
+            m_PixelData[byteIdx++] = 0; // r
+            m_PixelData[byteIdx++] = 0; // g
+            m_PixelData[byteIdx++] = 0; // b
+            m_PixelData[byteIdx++] = 0; // a
+            continue;
+          }
 
-		  unsigned int redVal = GetChannelForPixel(x, y, 0);
-		  if(redVal == INT_MAX)
-			return false;
+          unsigned int redVal = GetChannelForPixel(x, y, 0);
+          if(redVal == INT_MAX)
+            return false;
 
-		  unsigned int greenVal = redVal;
-		  unsigned int blueVal = redVal;
+          unsigned int greenVal = redVal;
+          unsigned int blueVal = redVal;
 
-		  if(GetGreenChannelPrecision() > 0) {
-			greenVal = GetChannelForPixel(x, y, 1);
-			if(greenVal == INT_MAX)
-			  return false;
-		  }
+          if(GetGreenChannelPrecision() > 0) {
+            greenVal = GetChannelForPixel(x, y, 1);
+            if(greenVal == INT_MAX)
+              return false;
+          }
 
-		  if(GetBlueChannelPrecision() > 0) {
-			blueVal = GetChannelForPixel(x, y, 2);
-			if(blueVal == INT_MAX)
-			  return false;
-		  }
+          if(GetBlueChannelPrecision() > 0) {
+            blueVal = GetChannelForPixel(x, y, 2);
+            if(blueVal == INT_MAX)
+              return false;
+          }
 
-		  unsigned int alphaVal = 0xFF;
-		  if(GetAlphaChannelPrecision() > 0) {
-			alphaVal = GetChannelForPixel(x, y, 3);
-			if(alphaVal == INT_MAX)
-			  return false;
-		  }
+          unsigned int alphaVal = 0xFF;
+          if(GetAlphaChannelPrecision() > 0) {
+            alphaVal = GetChannelForPixel(x, y, 3);
+            if(alphaVal == INT_MAX)
+              return false;
+          }
 
-		  // Red channel
-		  m_PixelData[byteIdx++] = redVal & 0xFF;
+          // Red channel
+          m_PixelData[byteIdx++] = redVal & 0xFF;
 
-		  // Green channel
-		  m_PixelData[byteIdx++] = greenVal & 0xFF;
+          // Green channel
+          m_PixelData[byteIdx++] = greenVal & 0xFF;
 
-		  // Blue channel
-		  m_PixelData[byteIdx++] = blueVal & 0xFF;
+          // Blue channel
+          m_PixelData[byteIdx++] = blueVal & 0xFF;
 
-		  // Alpha channel
-		  m_PixelData[byteIdx++] = alphaVal & 0xFF;
-		}
+          // Alpha channel
+          m_PixelData[byteIdx++] = alphaVal & 0xFF;
+        }
       }
     }
   }
diff --git a/IO/src/ImageLoaderPNG.cpp b/IO/src/ImageLoaderPNG.cpp
index 48f4411..04a4ecc 100644
--- a/IO/src/ImageLoaderPNG.cpp
+++ b/IO/src/ImageLoaderPNG.cpp
@@ -54,10 +54,8 @@ static void ReportError(const char *msg) {
 
 class PNGStreamReader {
 public:
-  static void ReadDataFromStream(
-    png_structp png_ptr, 
-    png_bytep outBytes, 
-    png_size_t byteCountToRead
+  static void ReadDataFromStream(png_structp png_ptr, 
+           png_bytep outBytes, png_size_t byteCountToRead
   ) {
     png_voidp io_ptr = png_get_io_ptr( png_ptr );
     if( io_ptr == NULL ) {
@@ -120,9 +118,9 @@ bool ImageLoaderPNG::ReadData() {
   int colorType = -1;
 
   if( 1 != png_get_IHDR(png_ptr, info_ptr, 
-      (png_uint_32 *)(&m_Width), (png_uint_32 *)(&m_Height), 
-      &bitDepth, &colorType, 
-      NULL, NULL, NULL) 
+    (png_uint_32 *)(&m_Width), (png_uint_32 *)(&m_Height), 
+    &bitDepth, &colorType, 
+    NULL, NULL, NULL) 
   ) {
     ReportError("Could not read PNG header");
     png_destroy_read_struct(&png_ptr, NULL, NULL);
@@ -140,33 +138,33 @@ bool ImageLoaderPNG::ReadData() {
   png_bytep rowData = new png_byte[bpr];
 
   switch(colorType) {
-  default:
-  case PNG_COLOR_TYPE_PALETTE:
-    ReportError("PNG color type unsupported");
-    png_destroy_read_struct(&png_ptr, NULL, NULL);
-    return false;
+    default:
+    case PNG_COLOR_TYPE_PALETTE:
+      ReportError("PNG color type unsupported");
+      png_destroy_read_struct(&png_ptr, NULL, NULL);
+      return false;
 
-  case PNG_COLOR_TYPE_GRAY: {
+    case PNG_COLOR_TYPE_GRAY: {
       m_RedChannelPrecision = bitDepth;
       m_RedData = new unsigned char[numPixels];
 
       for(uint32 i = 0; i < m_Height; i++) {
-	
-	png_read_row(png_ptr, rowData, NULL);
+  
+        png_read_row(png_ptr, rowData, NULL);
 
-	unsigned int rowOffset = i * m_Width;
-	
-	unsigned int byteIdx = 0;
-	for(uint32 j = 0; j < m_Width; j++) {
- 	  m_RedData[rowOffset + j] = rowData[byteIdx++];
-	}
+        unsigned int rowOffset = i * m_Width;
+  
+        unsigned int byteIdx = 0;
+        for(uint32 j = 0; j < m_Width; j++) {
+          m_RedData[rowOffset + j] = rowData[byteIdx++];
+        }
 
-	assert(byteIdx == bpr);
+        assert(byteIdx == bpr);
       }
     }
     break;
 
-  case PNG_COLOR_TYPE_RGB:
+    case PNG_COLOR_TYPE_RGB:
       m_RedChannelPrecision = bitDepth;
       m_RedData = new unsigned char[numPixels];
       m_GreenChannelPrecision = bitDepth;
@@ -175,23 +173,23 @@ bool ImageLoaderPNG::ReadData() {
       m_BlueData = new unsigned char[numPixels];
 
       for(uint32 i = 0; i < m_Height; i++) {
-	
-	png_read_row(png_ptr, rowData, NULL);
+  
+        png_read_row(png_ptr, rowData, NULL);
 
-	unsigned int rowOffset = i * m_Width;
-	
-	unsigned int byteIdx = 0;
-	for(uint32 j = 0; j < m_Width; j++) {
- 	  m_RedData[rowOffset + j] = rowData[byteIdx++];
-	  m_GreenData[rowOffset + j] = rowData[byteIdx++];
-	  m_BlueData[rowOffset + j] = rowData[byteIdx++];
-	}
+        unsigned int rowOffset = i * m_Width;
+  
+        unsigned int byteIdx = 0;
+        for(uint32 j = 0; j < m_Width; j++) {
+          m_RedData[rowOffset + j] = rowData[byteIdx++];
+          m_GreenData[rowOffset + j] = rowData[byteIdx++];
+          m_BlueData[rowOffset + j] = rowData[byteIdx++];
+        }
 
-	assert(byteIdx == bpr);
+        assert(byteIdx == bpr);
       }
     break;
 
-  case PNG_COLOR_TYPE_RGB_ALPHA:
+    case PNG_COLOR_TYPE_RGB_ALPHA:
       m_RedChannelPrecision = bitDepth;
       m_RedData = new unsigned char[numPixels];
       m_GreenChannelPrecision = bitDepth;
@@ -202,42 +200,42 @@ bool ImageLoaderPNG::ReadData() {
       m_AlphaData = new unsigned char[numPixels];
 
       for(uint32 i = 0; i < m_Height; i++) {
-	
-	png_read_row(png_ptr, rowData, NULL);
+  
+        png_read_row(png_ptr, rowData, NULL);
 
-	unsigned int rowOffset = i * m_Width;
-	
-	unsigned int byteIdx = 0;
-	for(uint32 j = 0; j < m_Width; j++) {
- 	  m_RedData[rowOffset + j] = rowData[byteIdx++];
-	  m_GreenData[rowOffset + j] = rowData[byteIdx++];
-	  m_BlueData[rowOffset + j] = rowData[byteIdx++];
-	  m_AlphaData[rowOffset + j] = rowData[byteIdx++];
-	}
+        unsigned int rowOffset = i * m_Width;
+  
+        unsigned int byteIdx = 0;
+        for(uint32 j = 0; j < m_Width; j++) {
+          m_RedData[rowOffset + j] = rowData[byteIdx++];
+          m_GreenData[rowOffset + j] = rowData[byteIdx++];
+          m_BlueData[rowOffset + j] = rowData[byteIdx++];
+          m_AlphaData[rowOffset + j] = rowData[byteIdx++];
+        }
 
-	assert(byteIdx == bpr);
+        assert(byteIdx == bpr);
       }
     break;
 
-  case PNG_COLOR_TYPE_GRAY_ALPHA:
+    case PNG_COLOR_TYPE_GRAY_ALPHA:
       m_RedChannelPrecision = bitDepth;
       m_RedData = new unsigned char[numPixels];
       m_AlphaChannelPrecision = bitDepth;
       m_AlphaData = new unsigned char[numPixels];
 
       for(uint32 i = 0; i < m_Height; i++) {
-	
-	png_read_row(png_ptr, rowData, NULL);
+  
+        png_read_row(png_ptr, rowData, NULL);
 
-	unsigned int rowOffset = i * m_Width;
-	
-	unsigned int byteIdx = 0;
-	for(uint32 j = 0; j < m_Width; j++) {
- 	  m_RedData[rowOffset + j] = rowData[byteIdx++];
-	  m_AlphaData[rowOffset + j] = rowData[byteIdx++];
-	}
+        unsigned int rowOffset = i * m_Width;
+  
+        unsigned int byteIdx = 0;
+        for(uint32 j = 0; j < m_Width; j++) {
+          m_RedData[rowOffset + j] = rowData[byteIdx++];
+          m_AlphaData[rowOffset + j] = rowData[byteIdx++];
+        }
 
-	assert(byteIdx == bpr);
+        assert(byteIdx == bpr);
       }
     break;
   }
diff --git a/IO/src/ImageWriterPNG.cpp b/IO/src/ImageWriterPNG.cpp
index 8ada946..2419208 100644
--- a/IO/src/ImageWriterPNG.cpp
+++ b/IO/src/ImageWriterPNG.cpp
@@ -66,87 +66,87 @@ public:
 
     ImageWriterPNG &writer = *(ImageWriterPNG *)(io_ptr);
 
-		while(writer.m_StreamPosition + byteCountToWrite > writer.m_RawFileDataSz) {
-			uint8 *newData = new uint8[writer.m_RawFileDataSz << 1];
-			memcpy(newData, writer.m_RawFileData, writer.m_RawFileDataSz);
-			writer.m_RawFileDataSz <<= 1;
-			delete writer.m_RawFileData;
-			writer.m_RawFileData = newData;
-		}
+    while(writer.m_StreamPosition + byteCountToWrite > writer.m_RawFileDataSz) {
+      uint8 *newData = new uint8[writer.m_RawFileDataSz << 1];
+      memcpy(newData, writer.m_RawFileData, writer.m_RawFileDataSz);
+      writer.m_RawFileDataSz <<= 1;
+      delete writer.m_RawFileData;
+      writer.m_RawFileData = newData;
+    }
 
-		unsigned char *stream = &(writer.m_RawFileData[writer.m_StreamPosition]);
+    unsigned char *stream = &(writer.m_RawFileData[writer.m_StreamPosition]);
     memcpy(stream, outBytes, byteCountToWrite);
 
     writer.m_StreamPosition += byteCountToWrite;
   }
 
-	static void FlushStream(png_structp png_ptr) { /* Do nothing... */ }
+  static void FlushStream(png_structp png_ptr) { /* Do nothing... */ }
 
 };
 
 ImageWriterPNG::ImageWriterPNG(const Image &im)
-	: ImageWriter(im.GetWidth(), im.GetHeight(), im.RawData())
+  : ImageWriter(im.GetWidth(), im.GetHeight(), im.RawData())
   , m_StreamPosition(0)
 {
 }
 
 bool ImageWriterPNG::WriteImage() {
 
-	png_structp png_ptr = NULL;
-	png_infop info_ptr = NULL;
-	png_byte ** row_pointers = NULL;
-	int pixel_size = 4;
-	int depth = 8;
+  png_structp png_ptr = NULL;
+  png_infop info_ptr = NULL;
+  png_byte ** row_pointers = NULL;
+  int pixel_size = 4;
+  int depth = 8;
     
-	png_ptr = png_create_write_struct (PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
-	if (png_ptr == NULL) {
-		return false;
-	}
+  png_ptr = png_create_write_struct (PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
+  if (png_ptr == NULL) {
+    return false;
+  }
     
-	info_ptr = png_create_info_struct (png_ptr);
-	if (info_ptr == NULL) {
-		png_destroy_write_struct (&png_ptr, &info_ptr);
-		return false;
-	}
+  info_ptr = png_create_info_struct (png_ptr);
+  if (info_ptr == NULL) {
+    png_destroy_write_struct (&png_ptr, &info_ptr);
+    return false;
+  }
     
-	/* Set image attributes. */
+  /* Set image attributes. */
 
-	png_set_IHDR (png_ptr,
-								info_ptr,
-								m_Width,
-								m_Height,
-								depth,
-								PNG_COLOR_TYPE_RGBA,
-								PNG_INTERLACE_NONE,
-								PNG_COMPRESSION_TYPE_DEFAULT,
-								PNG_FILTER_TYPE_DEFAULT);
+  png_set_IHDR (png_ptr,
+                info_ptr,
+                m_Width,
+                m_Height,
+                depth,
+                PNG_COLOR_TYPE_RGBA,
+                PNG_INTERLACE_NONE,
+                PNG_COMPRESSION_TYPE_DEFAULT,
+                PNG_FILTER_TYPE_DEFAULT);
     
-	/* Initialize rows of PNG. */
+  /* Initialize rows of PNG. */
 
-	row_pointers = (png_byte **)png_malloc (png_ptr, m_Height * sizeof (png_byte *));
-	for (uint32 y = 0; y < m_Height; ++y) {
-		png_byte *row = (png_byte *)png_malloc (png_ptr, sizeof (uint8) * m_Width * pixel_size);
+  row_pointers = (png_byte **)png_malloc (png_ptr, m_Height * sizeof (png_byte *));
+  for (uint32 y = 0; y < m_Height; ++y) {
+    png_byte *row = (png_byte *)png_malloc (png_ptr, sizeof (uint8) * m_Width * pixel_size);
 
-		row_pointers[y] = row;
+    row_pointers[y] = row;
 
-		for (uint32 x = 0; x < m_Width; ++x) {
-			for(uint32 ch = 0; ch < 4; ch++) {
-				*row++ = GetChannelForPixel(x, y, ch);
-			}
+    for (uint32 x = 0; x < m_Width; ++x) {
+      for(uint32 ch = 0; ch < 4; ch++) {
+        *row++ = GetChannelForPixel(x, y, ch);
+      }
     }
-	}
+  }
     
-	png_set_write_fn(png_ptr, this, PNGStreamWriter::WriteDataToStream, PNGStreamWriter::FlushStream);
-	png_set_rows (png_ptr, info_ptr, row_pointers);
-	png_write_png (png_ptr, info_ptr, PNG_TRANSFORM_IDENTITY, NULL);
+  png_set_write_fn(png_ptr, this, PNGStreamWriter::WriteDataToStream, PNGStreamWriter::FlushStream);
+  png_set_rows (png_ptr, info_ptr, row_pointers);
+  png_write_png (png_ptr, info_ptr, PNG_TRANSFORM_IDENTITY, NULL);
 
-	for (uint32 y = 0; y < m_Height; y++) {
-		png_free (png_ptr, row_pointers[y]);
-	}
-	png_free (png_ptr, row_pointers);
+  for (uint32 y = 0; y < m_Height; y++) {
+    png_free (png_ptr, row_pointers[y]);
+  }
+  png_free (png_ptr, row_pointers);
     
-	png_destroy_write_struct (&png_ptr, &info_ptr);
+  png_destroy_write_struct (&png_ptr, &info_ptr);
 
-	m_RawFileDataSz = m_StreamPosition;
-	return true;
+  m_RawFileDataSz = m_StreamPosition;
+  return true;
 }
diff --git a/IO/src/ImageWriterPNG.h b/IO/src/ImageWriterPNG.h
index 90b99e0..6b67059 100644
--- a/IO/src/ImageWriterPNG.h
+++ b/IO/src/ImageWriterPNG.h
@@ -55,8 +55,8 @@ class ImageWriterPNG : public ImageWriter {
 
   virtual bool WriteImage();
  private:
-	uint32 m_StreamPosition;
-	friend class PNGStreamWriter;
+  uint32 m_StreamPosition;
+  friend class PNGStreamWriter;
 };
 
 #endif // _IMAGE_LOADER_H_