diff --git a/BPTCEncoder/src/BC7CompressorSIMD.cpp b/BPTCEncoder/src/BC7CompressorSIMD.cpp index 6d7c30e..a5fe285 100755 --- a/BPTCEncoder/src/BC7CompressorSIMD.cpp +++ b/BPTCEncoder/src/BC7CompressorSIMD.cpp @@ -78,37 +78,37 @@ static const uint32 kNumShapes2 = 64; static const uint16 kShapeMask2[kNumShapes2] = { - 0xcccc, 0x8888, 0xeeee, 0xecc8, 0xc880, 0xfeec, 0xfec8, 0xec80, - 0xc800, 0xffec, 0xfe80, 0xe800, 0xffe8, 0xff00, 0xfff0, 0xf000, - 0xf710, 0x008e, 0x7100, 0x08ce, 0x008c, 0x7310, 0x3100, 0x8cce, - 0x088c, 0x3110, 0x6666, 0x366c, 0x17e8, 0x0ff0, 0x718e, 0x399c, - 0xaaaa, 0xf0f0, 0x5a5a, 0x33cc, 0x3c3c, 0x55aa, 0x9696, 0xa55a, - 0x73ce, 0x13c8, 0x324c, 0x3bdc, 0x6996, 0xc33c, 0x9966, 0x0660, - 0x0272, 0x04e4, 0x4e40, 0x2720, 0xc936, 0x936c, 0x39c6, 0x639c, - 0x9336, 0x9cc6, 0x817e, 0xe718, 0xccf0, 0x0fcc, 0x7744, 0xee22 + 0xcccc, 0x8888, 0xeeee, 0xecc8, 0xc880, 0xfeec, 0xfec8, 0xec80, + 0xc800, 0xffec, 0xfe80, 0xe800, 0xffe8, 0xff00, 0xfff0, 0xf000, + 0xf710, 0x008e, 0x7100, 0x08ce, 0x008c, 0x7310, 0x3100, 0x8cce, + 0x088c, 0x3110, 0x6666, 0x366c, 0x17e8, 0x0ff0, 0x718e, 0x399c, + 0xaaaa, 0xf0f0, 0x5a5a, 0x33cc, 0x3c3c, 0x55aa, 0x9696, 0xa55a, + 0x73ce, 0x13c8, 0x324c, 0x3bdc, 0x6996, 0xc33c, 0x9966, 0x0660, + 0x0272, 0x04e4, 0x4e40, 0x2720, 0xc936, 0x936c, 0x39c6, 0x639c, + 0x9336, 0x9cc6, 0x817e, 0xe718, 0xccf0, 0x0fcc, 0x7744, 0xee22 }; static const int kAnchorIdx2[kNumShapes2] = { - 15,15,15,15,15,15,15,15, - 15,15,15,15,15,15,15,15, - 15, 2, 8, 2, 2, 8, 8,15, - 2, 8, 2, 2, 8, 8, 2, 2, - 15,15, 6, 8, 2, 8,15,15, - 2, 8, 2, 2, 2,15,15, 6, - 6, 2, 6, 8,15,15, 2, 2, - 15,15,15,15,15, 2, 2, 15 + 15,15,15,15,15,15,15,15, + 15,15,15,15,15,15,15,15, + 15, 2, 8, 2, 2, 8, 8,15, + 2, 8, 2, 2, 8, 8, 2, 2, + 15,15, 6, 8, 2, 8,15,15, + 2, 8, 2, 2, 2,15,15, 6, + 6, 2, 6, 8,15,15, 2, 2, + 15,15,15,15,15, 2, 2, 15 }; static const uint32 kNumShapes3 = 64; static const uint16 kShapeMask3[kNumShapes3][2] = { - { 0xfecc, 0xf600 }, { 0xffc8, 0x7300 }, { 0xff90, 0x3310 }, { 0xecce, 0x00ce }, { 0xff00, 0xcc00 }, { 0xcccc, 0xcc00 }, { 0xffcc, 0x00cc }, { 0xffcc, 0x3300 }, - { 0xff00, 0xf000 }, { 0xfff0, 0xf000 }, { 0xfff0, 0xff00 }, { 0xcccc, 0x8888 }, { 0xeeee, 0x8888 }, { 0xeeee, 0xcccc }, { 0xffec, 0xec80 }, { 0x739c, 0x7310 }, - { 0xfec8, 0xc800 }, { 0x39ce, 0x3100 }, { 0xfff0, 0xccc0 }, { 0xfccc, 0x0ccc }, { 0xeeee, 0xee00 }, { 0xff88, 0x7700 }, { 0xeec0, 0xcc00 }, { 0x7730, 0x3300 }, - { 0x0cee, 0x00cc }, { 0xffcc, 0xfc88 }, { 0x6ff6, 0x0660 }, { 0xff60, 0x6600 }, { 0xcbbc, 0xc88c }, { 0xf966, 0xf900 }, { 0xceec, 0x0cc0 }, { 0xff10, 0x7310 }, - { 0xff80, 0xec80 }, { 0xccce, 0x08ce }, { 0xeccc, 0xec80 }, { 0x6666, 0x4444 }, { 0x0ff0, 0x0f00 }, { 0x6db6, 0x4924 }, { 0x6bd6, 0x4294 }, { 0xcf3c, 0x0c30 }, - { 0xc3fc, 0x03c0 }, { 0xffaa, 0xff00 }, { 0xff00, 0x5500 }, { 0xfcfc, 0xcccc }, { 0xcccc, 0x0c0c }, { 0xf6f6, 0x6666 }, { 0xaffa, 0x0ff0 }, { 0xfff0, 0x5550 }, - { 0xfaaa, 0xf000 }, { 0xeeee, 0x0e0e }, { 0xf8f8, 0x8888 }, { 0xfff0, 0x9990 }, { 0xeeee, 0xe00e }, { 0x8ff8, 0x8888 }, { 0xf666, 0xf000 }, { 0xff00, 0x9900 }, - { 0xff66, 0xff00 }, { 0xcccc, 0xc00c }, { 0xcffc, 0xcccc }, { 0xf000, 0x9000 }, { 0x8888, 0x0808 }, { 0xfefe, 0xeeee }, { 0xfffa, 0xfff0 }, { 0x7bde, 0x7310 } + { 0xfecc, 0xf600 }, { 0xffc8, 0x7300 }, { 0xff90, 0x3310 }, { 0xecce, 0x00ce }, { 0xff00, 0xcc00 }, { 0xcccc, 0xcc00 }, { 0xffcc, 0x00cc }, { 0xffcc, 0x3300 }, + { 0xff00, 0xf000 }, { 0xfff0, 0xf000 }, { 0xfff0, 0xff00 }, { 0xcccc, 0x8888 }, { 0xeeee, 0x8888 }, { 0xeeee, 0xcccc }, { 0xffec, 0xec80 }, { 0x739c, 0x7310 }, + { 0xfec8, 0xc800 }, { 0x39ce, 0x3100 }, { 0xfff0, 0xccc0 }, { 0xfccc, 0x0ccc }, { 0xeeee, 0xee00 }, { 0xff88, 0x7700 }, { 0xeec0, 0xcc00 }, { 0x7730, 0x3300 }, + { 0x0cee, 0x00cc }, { 0xffcc, 0xfc88 }, { 0x6ff6, 0x0660 }, { 0xff60, 0x6600 }, { 0xcbbc, 0xc88c }, { 0xf966, 0xf900 }, { 0xceec, 0x0cc0 }, { 0xff10, 0x7310 }, + { 0xff80, 0xec80 }, { 0xccce, 0x08ce }, { 0xeccc, 0xec80 }, { 0x6666, 0x4444 }, { 0x0ff0, 0x0f00 }, { 0x6db6, 0x4924 }, { 0x6bd6, 0x4294 }, { 0xcf3c, 0x0c30 }, + { 0xc3fc, 0x03c0 }, { 0xffaa, 0xff00 }, { 0xff00, 0x5500 }, { 0xfcfc, 0xcccc }, { 0xcccc, 0x0c0c }, { 0xf6f6, 0x6666 }, { 0xaffa, 0x0ff0 }, { 0xfff0, 0x5550 }, + { 0xfaaa, 0xf000 }, { 0xeeee, 0x0e0e }, { 0xf8f8, 0x8888 }, { 0xfff0, 0x9990 }, { 0xeeee, 0xe00e }, { 0x8ff8, 0x8888 }, { 0xf666, 0xf000 }, { 0xff00, 0x9900 }, + { 0xff66, 0xff00 }, { 0xcccc, 0xc00c }, { 0xcffc, 0xcccc }, { 0xf000, 0x9000 }, { 0x8888, 0x0808 }, { 0xfefe, 0xeeee }, { 0xfffa, 0xfff0 }, { 0x7bde, 0x7310 } }; static const uint32 kWMValues[] = { 0x32b92180, 0x32ba3080, 0x31103200, 0x28103c80, 0x32bb3080, 0x25903600, 0x3530b900, 0x3b32b180, 0x34b5b980 }; @@ -116,83 +116,83 @@ static const uint32 kNumWMVals = sizeof(kWMValues) / sizeof(kWMValues[0]); static uint32 gWMVal = -1; static const int kAnchorIdx3[2][kNumShapes3] = { - { 3, 3,15,15, 8, 3,15,15, - 8, 8, 6, 6, 6, 5, 3, 3, - 3, 3, 8,15, 3, 3, 6,10, - 5, 8, 8, 6, 8, 5,15,15, - 8,15, 3, 5, 6,10, 8,15, + { 3, 3,15,15, 8, 3,15,15, + 8, 8, 6, 6, 6, 5, 3, 3, + 3, 3, 8,15, 3, 3, 6,10, + 5, 8, 8, 6, 8, 5,15,15, + 8,15, 3, 5, 6,10, 8,15, 15, 3,15, 5,15,15,15,15, - 3,15, 5, 5, 5, 8, 5,10, - 5,10, 8,13,15,12, 3, 3 }, + 3,15, 5, 5, 5, 8, 5,10, + 5,10, 8,13,15,12, 3, 3 }, - { 15, 8, 8, 3,15,15, 3, 8, + { 15, 8, 8, 3,15,15, 3, 8, 15,15,15,15,15,15,15, 8, 15, 8,15, 3,15, 8,15, 8, - 3,15, 6,10,15,15,10, 8, + 3,15, 6,10,15,15,10, 8, 15, 3,15,10,10, 8, 9,10, - 6,15, 8,15, 3, 6, 6, 8, + 6,15, 8,15, 3, 6, 6, 8, 15, 3,15,15,15,15,15,15, - 15,15,15,15, 3,15,15, 8 } + 15,15,15,15, 3,15,15, 8 } }; const uint32 kBC7InterpolationValuesScalar[4][16][2] = { - { {64, 0}, {33, 31}, {0, 64}, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, - { {64, 0}, {43, 21}, {21, 43}, {0, 64}, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, - { {64, 0}, {55, 9}, {46, 18}, {37, 27}, {27, 37}, {18, 46}, {9, 55}, {0, 64}, 0, 0, 0, 0, 0, 0, 0, 0 }, - { {64, 0}, {60, 4}, {55, 9}, {51, 13}, {47, 17}, {43, 21}, {38, 26}, {34, 30}, {30, 34}, {26, 38}, {21, 43}, {17, 47}, {13, 51}, {9, 55}, {4, 60}, {0, 64} } + { {64, 0}, {33, 31}, {0, 64}, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + { {64, 0}, {43, 21}, {21, 43}, {0, 64}, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + { {64, 0}, {55, 9}, {46, 18}, {37, 27}, {27, 37}, {18, 46}, {9, 55}, {0, 64}, 0, 0, 0, 0, 0, 0, 0, 0 }, + { {64, 0}, {60, 4}, {55, 9}, {51, 13}, {47, 17}, {43, 21}, {38, 26}, {34, 30}, {30, 34}, {26, 38}, {21, 43}, {17, 47}, {13, 51}, {9, 55}, {4, 60}, {0, 64} } }; static const ALIGN_SSE uint32 kZeroVector[4] = { 0, 0, 0, 0 }; const __m128i kBC7InterpolationValuesSIMD[4][16][2] = { - { - { _mm_set1_epi32(64), *((const __m128i *)kZeroVector)}, - { _mm_set1_epi32(33), _mm_set1_epi32(31) }, - { *((const __m128i *)kZeroVector), _mm_set1_epi32(64) }, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - }, - { - { _mm_set1_epi32(64), *((const __m128i *)kZeroVector)}, - { _mm_set1_epi32(43), _mm_set1_epi32(21)}, - { _mm_set1_epi32(21), _mm_set1_epi32(43)}, - { *((const __m128i *)kZeroVector), _mm_set1_epi32(64)}, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - }, - { - { _mm_set1_epi32(64), *((const __m128i *)kZeroVector) }, - { _mm_set1_epi32(55), _mm_set1_epi32(9) }, - { _mm_set1_epi32(46), _mm_set1_epi32(18)}, - { _mm_set1_epi32(37), _mm_set1_epi32(27)}, - { _mm_set1_epi32(27), _mm_set1_epi32(37)}, - { _mm_set1_epi32(18), _mm_set1_epi32(46)}, - { _mm_set1_epi32(9), _mm_set1_epi32(55)}, - { *((const __m128i *)kZeroVector), _mm_set1_epi32(64)}, - 0, 0, 0, 0, 0, 0, 0, 0 - }, - { - { _mm_set1_epi32(64), *((const __m128i *)kZeroVector)}, - { _mm_set1_epi32(60), _mm_set1_epi32(4)}, - { _mm_set1_epi32(55), _mm_set1_epi32(9)}, - { _mm_set1_epi32(51), _mm_set1_epi32(13)}, - { _mm_set1_epi32(47), _mm_set1_epi32(17)}, - { _mm_set1_epi32(43), _mm_set1_epi32(21)}, - { _mm_set1_epi32(38), _mm_set1_epi32(26)}, - { _mm_set1_epi32(34), _mm_set1_epi32(30)}, - { _mm_set1_epi32(30), _mm_set1_epi32(34)}, - { _mm_set1_epi32(26), _mm_set1_epi32(38)}, - { _mm_set1_epi32(21), _mm_set1_epi32(43)}, - { _mm_set1_epi32(17), _mm_set1_epi32(47)}, - { _mm_set1_epi32(13), _mm_set1_epi32(51)}, - { _mm_set1_epi32(9), _mm_set1_epi32(55)}, - { _mm_set1_epi32(4), _mm_set1_epi32(60)}, - { *((const __m128i *)kZeroVector), _mm_set1_epi32(64)} - } + { + { _mm_set1_epi32(64), *((const __m128i *)kZeroVector)}, + { _mm_set1_epi32(33), _mm_set1_epi32(31) }, + { *((const __m128i *)kZeroVector), _mm_set1_epi32(64) }, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }, + { + { _mm_set1_epi32(64), *((const __m128i *)kZeroVector)}, + { _mm_set1_epi32(43), _mm_set1_epi32(21)}, + { _mm_set1_epi32(21), _mm_set1_epi32(43)}, + { *((const __m128i *)kZeroVector), _mm_set1_epi32(64)}, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }, + { + { _mm_set1_epi32(64), *((const __m128i *)kZeroVector) }, + { _mm_set1_epi32(55), _mm_set1_epi32(9) }, + { _mm_set1_epi32(46), _mm_set1_epi32(18)}, + { _mm_set1_epi32(37), _mm_set1_epi32(27)}, + { _mm_set1_epi32(27), _mm_set1_epi32(37)}, + { _mm_set1_epi32(18), _mm_set1_epi32(46)}, + { _mm_set1_epi32(9), _mm_set1_epi32(55)}, + { *((const __m128i *)kZeroVector), _mm_set1_epi32(64)}, + 0, 0, 0, 0, 0, 0, 0, 0 + }, + { + { _mm_set1_epi32(64), *((const __m128i *)kZeroVector)}, + { _mm_set1_epi32(60), _mm_set1_epi32(4)}, + { _mm_set1_epi32(55), _mm_set1_epi32(9)}, + { _mm_set1_epi32(51), _mm_set1_epi32(13)}, + { _mm_set1_epi32(47), _mm_set1_epi32(17)}, + { _mm_set1_epi32(43), _mm_set1_epi32(21)}, + { _mm_set1_epi32(38), _mm_set1_epi32(26)}, + { _mm_set1_epi32(34), _mm_set1_epi32(30)}, + { _mm_set1_epi32(30), _mm_set1_epi32(34)}, + { _mm_set1_epi32(26), _mm_set1_epi32(38)}, + { _mm_set1_epi32(21), _mm_set1_epi32(43)}, + { _mm_set1_epi32(17), _mm_set1_epi32(47)}, + { _mm_set1_epi32(13), _mm_set1_epi32(51)}, + { _mm_set1_epi32(9), _mm_set1_epi32(55)}, + { _mm_set1_epi32(4), _mm_set1_epi32(60)}, + { *((const __m128i *)kZeroVector), _mm_set1_epi32(64)} + } }; static const ALIGN_SSE uint32 kByteValMask[4] = { 0xFF, 0xFF, 0xFF, 0xFF }; static inline __m128i sad(const __m128i &a, const __m128i &b) { - const __m128i maxab = _mm_max_epu8(a, b); - const __m128i minab = _mm_min_epu8(a, b); - return _mm_and_si128( *((const __m128i *)kByteValMask), _mm_subs_epu8( maxab, minab ) ); + const __m128i maxab = _mm_max_epu8(a, b); + const __m128i minab = _mm_min_epu8(a, b); + return _mm_and_si128( *((const __m128i *)kByteValMask), _mm_subs_epu8( maxab, minab ) ); } #include @@ -220,211 +220,211 @@ int BC7CompressionModeSIMD::MaxAnnealingIterations = 50; // This is a setting. int BC7CompressionModeSIMD::NumUses[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; BC7CompressionModeSIMD::Attributes BC7CompressionModeSIMD::kModeAttributes[kNumModes] = { - { 0, 4, 3, 3, 4, 4, 4, 0, BC7CompressionModeSIMD::ePBitType_NotShared }, - { 1, 6, 2, 3, 6, 6, 6, 0, BC7CompressionModeSIMD::ePBitType_Shared }, - { 2, 6, 3, 2, 5, 5, 5, 0, BC7CompressionModeSIMD::ePBitType_None }, - { 3, 6, 2, 2, 7, 7, 7, 0, BC7CompressionModeSIMD::ePBitType_NotShared }, - { 0 }, // Mode 4 not supported - { 0 }, // Mode 5 not supported - { 6, 0, 1, 4, 7, 7, 7, 7, BC7CompressionModeSIMD::ePBitType_NotShared }, - { 7, 6, 2, 2, 5, 5, 5, 5, BC7CompressionModeSIMD::ePBitType_NotShared }, + { 0, 4, 3, 3, 4, 4, 4, 0, BC7CompressionModeSIMD::ePBitType_NotShared }, + { 1, 6, 2, 3, 6, 6, 6, 0, BC7CompressionModeSIMD::ePBitType_Shared }, + { 2, 6, 3, 2, 5, 5, 5, 0, BC7CompressionModeSIMD::ePBitType_None }, + { 3, 6, 2, 2, 7, 7, 7, 0, BC7CompressionModeSIMD::ePBitType_NotShared }, + { 0 }, // Mode 4 not supported + { 0 }, // Mode 5 not supported + { 6, 0, 1, 4, 7, 7, 7, 7, BC7CompressionModeSIMD::ePBitType_NotShared }, + { 7, 6, 2, 2, 5, 5, 5, 5, BC7CompressionModeSIMD::ePBitType_NotShared }, }; void BC7CompressionModeSIMD::ClampEndpointsToGrid(RGBAVectorSIMD &p1, RGBAVectorSIMD &p2, int &bestPBitCombo) const { - const int nPbitCombos = GetNumPbitCombos(); - const bool hasPbits = nPbitCombos > 1; - __m128i qmask; - GetQuantizationMask(qmask); + const int nPbitCombos = GetNumPbitCombos(); + const bool hasPbits = nPbitCombos > 1; + __m128i qmask; + GetQuantizationMask(qmask); - ClampEndpoints(p1, p2); + ClampEndpoints(p1, p2); - // !SPEED! This can be faster. We're searching through all possible - // pBit combos to find the best one. Instead, we should be seeing what - // the pBit type is for this compression mode and finding the closest - // quantization. - float minDist = FLT_MAX; - RGBAVectorSIMD bp1, bp2; - for(int i = 0; i < nPbitCombos; i++) { + // !SPEED! This can be faster. We're searching through all possible + // pBit combos to find the best one. Instead, we should be seeing what + // the pBit type is for this compression mode and finding the closest + // quantization. + float minDist = FLT_MAX; + RGBAVectorSIMD bp1, bp2; + for(int i = 0; i < nPbitCombos; i++) { - __m128i qp1, qp2; - if(hasPbits) { - qp1 = p1.ToPixel(qmask, GetPBitCombo(i)[0]); - qp2 = p2.ToPixel(qmask, GetPBitCombo(i)[1]); - } - else { - qp1 = p1.ToPixel(qmask); - qp2 = p2.ToPixel(qmask); - } + __m128i qp1, qp2; + if(hasPbits) { + qp1 = p1.ToPixel(qmask, GetPBitCombo(i)[0]); + qp2 = p2.ToPixel(qmask, GetPBitCombo(i)[1]); + } + else { + qp1 = p1.ToPixel(qmask); + qp2 = p2.ToPixel(qmask); + } - RGBAVectorSIMD np1 = RGBAVectorSIMD( _mm_cvtepi32_ps( qp1 ) ); - RGBAVectorSIMD np2 = RGBAVectorSIMD( _mm_cvtepi32_ps( qp2 ) ); + RGBAVectorSIMD np1 = RGBAVectorSIMD( _mm_cvtepi32_ps( qp1 ) ); + RGBAVectorSIMD np2 = RGBAVectorSIMD( _mm_cvtepi32_ps( qp2 ) ); - RGBAVectorSIMD d1 = np1 - p1; - RGBAVectorSIMD d2 = np2 - p2; - float dist = (d1 * d1) + (d2 * d2); - if(dist < minDist) { - minDist = dist; - bp1 = np1; bp2 = np2; - bestPBitCombo = i; - } - } + RGBAVectorSIMD d1 = np1 - p1; + RGBAVectorSIMD d2 = np2 - p2; + float dist = (d1 * d1) + (d2 * d2); + if(dist < minDist) { + minDist = dist; + bp1 = np1; bp2 = np2; + bestPBitCombo = i; + } + } - p1 = bp1; - p2 = bp2; + p1 = bp1; + p2 = bp2; } int BC7CompressionModeSIMD::GetSubsetForIndex(int idx, const int shapeIdx) const { - int subset = 0; - - const int nSubsets = GetNumberOfSubsets(); - switch(nSubsets) { - case 2: - { - subset = !!((1 << idx) & kShapeMask2[shapeIdx]); - } - break; + int subset = 0; + + const int nSubsets = GetNumberOfSubsets(); + switch(nSubsets) { + case 2: + { + subset = !!((1 << idx) & kShapeMask2[shapeIdx]); + } + break; - case 3: - { - if(1 << idx & kShapeMask3[shapeIdx][0]) - subset = 1 + !!((1 << idx) & kShapeMask3[shapeIdx][1]); - else - subset = 0; - } - break; + case 3: + { + if(1 << idx & kShapeMask3[shapeIdx][0]) + subset = 1 + !!((1 << idx) & kShapeMask3[shapeIdx][1]); + else + subset = 0; + } + break; - default: - break; - } + default: + break; + } - return subset; + return subset; } int BC7CompressionModeSIMD::GetAnchorIndexForSubset(int subset, const int shapeIdx) const { - - const int nSubsets = GetNumberOfSubsets(); - int anchorIdx = 0; + + const int nSubsets = GetNumberOfSubsets(); + int anchorIdx = 0; - switch(subset) { - case 1: - { - if(nSubsets == 2) { - anchorIdx = kAnchorIdx2[shapeIdx]; - } - else { - anchorIdx = kAnchorIdx3[0][shapeIdx]; - } - } - break; + switch(subset) { + case 1: + { + if(nSubsets == 2) { + anchorIdx = kAnchorIdx2[shapeIdx]; + } + else { + anchorIdx = kAnchorIdx3[0][shapeIdx]; + } + } + break; - case 2: - { - assert(nSubsets == 3); - anchorIdx = kAnchorIdx3[1][shapeIdx]; - } - break; + case 2: + { + assert(nSubsets == 3); + anchorIdx = kAnchorIdx3[1][shapeIdx]; + } + break; - default: - break; - } + default: + break; + } - return anchorIdx; + return anchorIdx; } double BC7CompressionModeSIMD::CompressSingleColor(const RGBAVectorSIMD &p, RGBAVectorSIMD &p1, RGBAVectorSIMD &p2, int &bestPbitCombo) const { - // Our pixel to compress... - const __m128i pixel = p.ToPixel(*((const __m128i *)kByteValMask)); + // Our pixel to compress... + const __m128i pixel = p.ToPixel(*((const __m128i *)kByteValMask)); - uint32 bestDist = 0xFF; - bestPbitCombo = -1; + uint32 bestDist = 0xFF; + bestPbitCombo = -1; - for(int pbi = 0; pbi < GetNumPbitCombos(); pbi++) { + for(int pbi = 0; pbi < GetNumPbitCombos(); pbi++) { - const int *pbitCombo = GetPBitCombo(pbi); - - uint32 dist = 0x0; - uint32 bestValI[kNumColorChannels] = { -1, -1, -1, -1 }; - uint32 bestValJ[kNumColorChannels] = { -1, -1, -1, -1 }; + const int *pbitCombo = GetPBitCombo(pbi); + + uint32 dist = 0x0; + uint32 bestValI[kNumColorChannels] = { -1, -1, -1, -1 }; + uint32 bestValJ[kNumColorChannels] = { -1, -1, -1, -1 }; - for(int ci = 0; ci < kNumColorChannels; ci++) { + for(int ci = 0; ci < kNumColorChannels; ci++) { - const uint8 val = ((uint8 *)(&pixel))[4*ci]; - int nBits = 0; - switch(ci) { - case 0: nBits = GetRedChannelPrecision(); break; - case 1: nBits = GetGreenChannelPrecision(); break; - case 2: nBits = GetBlueChannelPrecision(); break; - case 3: nBits = GetAlphaChannelPrecision(); break; - } + const uint8 val = ((uint8 *)(&pixel))[4*ci]; + int nBits = 0; + switch(ci) { + case 0: nBits = GetRedChannelPrecision(); break; + case 1: nBits = GetGreenChannelPrecision(); break; + case 2: nBits = GetBlueChannelPrecision(); break; + case 3: nBits = GetAlphaChannelPrecision(); break; + } - // If we don't handle this channel, then we don't need to - // worry about how well we interpolate. - if(nBits == 0) { bestValI[ci] = bestValJ[ci] = 0xFF; continue; } + // If we don't handle this channel, then we don't need to + // worry about how well we interpolate. + if(nBits == 0) { bestValI[ci] = bestValJ[ci] = 0xFF; continue; } - const int nPossVals = (1 << nBits); - int possValsH[256]; - int possValsL[256]; + const int nPossVals = (1 << nBits); + int possValsH[256]; + int possValsL[256]; - // Do we have a pbit? - const bool havepbit = GetPBitType() != ePBitType_None; - if(havepbit) - nBits++; + // Do we have a pbit? + const bool havepbit = GetPBitType() != ePBitType_None; + if(havepbit) + nBits++; - for(int i = 0; i < nPossVals; i++) { + for(int i = 0; i < nPossVals; i++) { - int vh = i, vl = i; - if(havepbit) { - vh <<= 1; - vl <<= 1; + int vh = i, vl = i; + if(havepbit) { + vh <<= 1; + vl <<= 1; - vh |= pbitCombo[1]; - vl |= pbitCombo[0]; - } + vh |= pbitCombo[1]; + vl |= pbitCombo[0]; + } - possValsH[i] = (vh << (8 - nBits)); - possValsH[i] |= (possValsH[i] >> nBits); + possValsH[i] = (vh << (8 - nBits)); + possValsH[i] |= (possValsH[i] >> nBits); - possValsL[i] = (vl << (8 - nBits)); - possValsL[i] |= (possValsL[i] >> nBits); - } + possValsL[i] = (vl << (8 - nBits)); + possValsL[i] |= (possValsL[i] >> nBits); + } - const uint32 interpVal0 = kBC7InterpolationValuesScalar[GetNumberOfBitsPerIndex() - 1][1][0]; - const uint32 interpVal1 = kBC7InterpolationValuesScalar[GetNumberOfBitsPerIndex() - 1][1][1]; + const uint32 interpVal0 = kBC7InterpolationValuesScalar[GetNumberOfBitsPerIndex() - 1][1][0]; + const uint32 interpVal1 = kBC7InterpolationValuesScalar[GetNumberOfBitsPerIndex() - 1][1][1]; - // Find the closest interpolated val that to the given val... - uint32 bestChannelDist = 0xFF; - for(int i = 0; bestChannelDist > 0 && i < nPossVals; i++) - for(int j = 0; bestChannelDist > 0 && j < nPossVals; j++) { + // Find the closest interpolated val that to the given val... + uint32 bestChannelDist = 0xFF; + for(int i = 0; bestChannelDist > 0 && i < nPossVals; i++) + for(int j = 0; bestChannelDist > 0 && j < nPossVals; j++) { - const uint32 v1 = possValsL[i]; - const uint32 v2 = possValsH[j]; + const uint32 v1 = possValsL[i]; + const uint32 v2 = possValsH[j]; - const uint32 combo = (interpVal0*v1 + (interpVal1 * v2) + 32) >> 6; - const uint32 err = (combo > val)? combo - val : val - combo; + const uint32 combo = (interpVal0*v1 + (interpVal1 * v2) + 32) >> 6; + const uint32 err = (combo > val)? combo - val : val - combo; - if(err < bestChannelDist) { - bestChannelDist = err; - bestValI[ci] = v1; - bestValJ[ci] = v2; - } - } + if(err < bestChannelDist) { + bestChannelDist = err; + bestValI[ci] = v1; + bestValJ[ci] = v2; + } + } - dist = max(bestChannelDist, dist); - } + dist = max(bestChannelDist, dist); + } - if(dist < bestDist) { - bestDist = dist; - bestPbitCombo = pbi; + if(dist < bestDist) { + bestDist = dist; + bestPbitCombo = pbi; - for(int ci = 0; ci < kNumColorChannels; ci++) { - p1.c[ci] = float(bestValI[ci]); - p2.c[ci] = float(bestValJ[ci]); - } - } - } + for(int ci = 0; ci < kNumColorChannels; ci++) { + p1.c[ci] = float(bestValI[ci]); + p2.c[ci] = float(bestValJ[ci]); + } + } + } - return bestDist; + return bestDist; } static const ALIGN_SSE uint32 kOneVec[4] = { 1, 1, 1, 1 }; @@ -433,26 +433,26 @@ static const ALIGN_SSE uint32 kOneVec[4] = { 1, 1, 1, 1 }; // http://software.intel.com/en-us/articles/fast-random-number-generator-on-the-intel-pentiumr-4-processor/ static uint32 g_seed = uint32(time(NULL)); static inline uint32 fastrand() { - g_seed = (214013 * g_seed + 2531011); - return (g_seed>>16) & RAND_MAX; + g_seed = (214013 * g_seed + 2531011); + return (g_seed>>16) & RAND_MAX; } -static __m128i cur_seed = _mm_set1_epi32( int(time(NULL)) ); +static __m128i cur_seed = _mm_set1_epi32( int(time(NULL)) ); static inline __m128i rand_dir() { - // static const __m128i mult = _mm_set_epi32( 214013, 17405, 214013, 69069 ); - // static const __m128i gadd = _mm_set_epi32( 2531011, 10395331, 13737667, 1 ); - static const ALIGN_SSE uint32 mult[4] = { 214013, 17405, 214013, 0 }; - static const ALIGN_SSE uint32 gadd[4] = { 2531011, 10395331, 13737667, 0 }; - static const ALIGN_SSE uint32 masklo[4] = { RAND_MAX, RAND_MAX, RAND_MAX, RAND_MAX }; - - cur_seed = _mm_mullo_epi32( *((const __m128i *)mult), cur_seed ); - cur_seed = _mm_add_epi32( *((const __m128i *)gadd), cur_seed ); + // static const __m128i mult = _mm_set_epi32( 214013, 17405, 214013, 69069 ); + // static const __m128i gadd = _mm_set_epi32( 2531011, 10395331, 13737667, 1 ); + static const ALIGN_SSE uint32 mult[4] = { 214013, 17405, 214013, 0 }; + static const ALIGN_SSE uint32 gadd[4] = { 2531011, 10395331, 13737667, 0 }; + static const ALIGN_SSE uint32 masklo[4] = { RAND_MAX, RAND_MAX, RAND_MAX, RAND_MAX }; + + cur_seed = _mm_mullo_epi32( *((const __m128i *)mult), cur_seed ); + cur_seed = _mm_add_epi32( *((const __m128i *)gadd), cur_seed ); - const __m128i resShift = _mm_srai_epi32( cur_seed, 16 ); - const __m128i result = _mm_and_si128( resShift, *((const __m128i *)kOneVec) ); + const __m128i resShift = _mm_srai_epi32( cur_seed, 16 ); + const __m128i result = _mm_and_si128( resShift, *((const __m128i *)kOneVec) ); - return result; + return result; } // Fast generation of floats between 0 and 1. It generates a float @@ -463,18 +463,18 @@ static inline __m128i rand_dir() #define COMPILE_ASSERT(x) extern int __compile_assert_[(int)(x)]; COMPILE_ASSERT(RAND_MAX == 0x7FFF) - + static inline float frand() { - const uint16 r = fastrand(); - - // RAND_MAX is 0x7FFF, which offers 15 bits - // of precision. Therefore, we move the bits - // into the top of the 23 bit mantissa, and - // repeat the most significant bits of r in - // the least significant of the mantissa - const uint32 m = (r << 8) | (r >> 7); - const uint32 flt = (127 << 23) | m; - return *(reinterpret_cast(&flt)) - 1.0f; + const uint16 r = fastrand(); + + // RAND_MAX is 0x7FFF, which offers 15 bits + // of precision. Therefore, we move the bits + // into the top of the 23 bit mantissa, and + // repeat the most significant bits of r in + // the least significant of the mantissa + const uint32 m = (r << 8) | (r >> 7); + const uint32 flt = (127 << 23) | m; + return *(reinterpret_cast(&flt)) - 1.0f; } static const ALIGN_SSE uint32 kSevenVec[4] = { 7, 7, 7, 7 }; @@ -482,827 +482,824 @@ static const ALIGN_SSE uint32 kNegOneVec[4] = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF static const ALIGN_SSE uint32 kFloatSignBit[4] = { 0x40000000, 0x40000000, 0x40000000, 0x40000000 }; static void ChangePointForDirWithoutPbitChange(RGBAVectorSIMD &v, const __m128 &stepVec) { - - const __m128i dirBool = rand_dir(); - const __m128i cmp = _mm_cmpeq_epi32( dirBool, *((const __m128i *)kZeroVector) ); + + const __m128i dirBool = rand_dir(); + const __m128i cmp = _mm_cmpeq_epi32( dirBool, *((const __m128i *)kZeroVector) ); - const __m128 negStepVec = _mm_sub_ps( _mm_castsi128_ps( *((const __m128i *)kZeroVector) ), stepVec ); - const __m128 step = _mm_blendv_ps( negStepVec, stepVec, _mm_castsi128_ps( cmp ) ); - v.vec = _mm_add_ps( v.vec, step ); + const __m128 negStepVec = _mm_sub_ps( _mm_castsi128_ps( *((const __m128i *)kZeroVector) ), stepVec ); + const __m128 step = _mm_blendv_ps( negStepVec, stepVec, _mm_castsi128_ps( cmp ) ); + v.vec = _mm_add_ps( v.vec, step ); } static void ChangePointForDirWithPbitChange(RGBAVectorSIMD &v, int oldPbit, const __m128 &stepVec) { - const __m128i pBitVec = _mm_set1_epi32( oldPbit ); - const __m128i cmpPBit = _mm_cmpeq_epi32( pBitVec, *((const __m128i *)kZeroVector) ); - const __m128i notCmpPBit = _mm_xor_si128( cmpPBit, *((const __m128i *)kNegOneVec) ); + const __m128i pBitVec = _mm_set1_epi32( oldPbit ); + const __m128i cmpPBit = _mm_cmpeq_epi32( pBitVec, *((const __m128i *)kZeroVector) ); + const __m128i notCmpPBit = _mm_xor_si128( cmpPBit, *((const __m128i *)kNegOneVec) ); - const __m128i dirBool = rand_dir(); - const __m128i cmpDir = _mm_cmpeq_epi32( dirBool, *((const __m128i *)kOneVec) ); - const __m128i notCmpDir = _mm_xor_si128( cmpDir, *((const __m128i *)kNegOneVec) ); - - const __m128i shouldDec = _mm_and_si128( cmpDir, cmpPBit ); - const __m128i shouldInc = _mm_and_si128( notCmpDir, notCmpPBit ); + const __m128i dirBool = rand_dir(); + const __m128i cmpDir = _mm_cmpeq_epi32( dirBool, *((const __m128i *)kOneVec) ); + const __m128i notCmpDir = _mm_xor_si128( cmpDir, *((const __m128i *)kNegOneVec) ); + + const __m128i shouldDec = _mm_and_si128( cmpDir, cmpPBit ); + const __m128i shouldInc = _mm_and_si128( notCmpDir, notCmpPBit ); - const __m128 decStep = _mm_blendv_ps( _mm_castsi128_ps( *((const __m128i *)kZeroVector) ), stepVec, _mm_castsi128_ps( shouldDec ) ); - v.vec = _mm_sub_ps( v.vec, decStep ); + const __m128 decStep = _mm_blendv_ps( _mm_castsi128_ps( *((const __m128i *)kZeroVector) ), stepVec, _mm_castsi128_ps( shouldDec ) ); + v.vec = _mm_sub_ps( v.vec, decStep ); - const __m128 incStep = _mm_blendv_ps( _mm_castsi128_ps( *((const __m128i *)kZeroVector) ), stepVec, _mm_castsi128_ps( shouldInc ) ); - v.vec = _mm_add_ps( v.vec, incStep ); + const __m128 incStep = _mm_blendv_ps( _mm_castsi128_ps( *((const __m128i *)kZeroVector) ), stepVec, _mm_castsi128_ps( shouldInc ) ); + v.vec = _mm_add_ps( v.vec, incStep ); } void BC7CompressionModeSIMD::PickBestNeighboringEndpoints(const RGBAClusterSIMD &cluster, const RGBAVectorSIMD &p1, const RGBAVectorSIMD &p2, const int curPbitCombo, RGBAVectorSIMD &np1, RGBAVectorSIMD &np2, int &nPbitCombo, const __m128 &stepVec) const { - np1 = p1; - np2 = p2; + np1 = p1; + np2 = p2; - // First, let's figure out the new pbit combo... if there's no pbit then we don't need - // to worry about it. - const EPBitType pBitType = GetPBitType(); - if(pBitType != ePBitType_None) { + // First, let's figure out the new pbit combo... if there's no pbit then we don't need + // to worry about it. + const EPBitType pBitType = GetPBitType(); + if(pBitType != ePBitType_None) { - // If there is a pbit, then we must change it, because those will provide the closest values - // to the current point. - if(pBitType == ePBitType_Shared) - nPbitCombo = (curPbitCombo + 1) % 2; - else { - // Not shared... p1 needs to change and p2 needs to change... which means that - // combo 0 gets rotated to combo 3, combo 1 gets rotated to combo 2 and vice - // versa... - nPbitCombo = 3 - curPbitCombo; - } + // If there is a pbit, then we must change it, because those will provide the closest values + // to the current point. + if(pBitType == ePBitType_Shared) + nPbitCombo = (curPbitCombo + 1) % 2; + else { + // Not shared... p1 needs to change and p2 needs to change... which means that + // combo 0 gets rotated to combo 3, combo 1 gets rotated to combo 2 and vice + // versa... + nPbitCombo = 3 - curPbitCombo; + } - assert(GetPBitCombo(curPbitCombo)[0] + GetPBitCombo(nPbitCombo)[0] == 1); - assert(GetPBitCombo(curPbitCombo)[1] + GetPBitCombo(nPbitCombo)[1] == 1); + assert(GetPBitCombo(curPbitCombo)[0] + GetPBitCombo(nPbitCombo)[0] == 1); + assert(GetPBitCombo(curPbitCombo)[1] + GetPBitCombo(nPbitCombo)[1] == 1); - const int *pBitCombo = GetPBitCombo(curPbitCombo); - ChangePointForDirWithPbitChange(np1, pBitCombo[0], stepVec); - ChangePointForDirWithPbitChange(np2, pBitCombo[1], stepVec); - } - else { - ChangePointForDirWithoutPbitChange(np1, stepVec); - ChangePointForDirWithoutPbitChange(np2, stepVec); - } + const int *pBitCombo = GetPBitCombo(curPbitCombo); + ChangePointForDirWithPbitChange(np1, pBitCombo[0], stepVec); + ChangePointForDirWithPbitChange(np2, pBitCombo[1], stepVec); + } + else { + ChangePointForDirWithoutPbitChange(np1, stepVec); + ChangePointForDirWithoutPbitChange(np2, stepVec); + } - ClampEndpoints(np1, np2); + ClampEndpoints(np1, np2); } bool BC7CompressionModeSIMD::AcceptNewEndpointError(float newError, float oldError, float temp) const { - const float p = exp((0.15f * (oldError - newError)) / temp); - // const double r = (double(rand()) / double(RAND_MAX)); - const float r = frand(); + const float p = exp((0.15f * (oldError - newError)) / temp); + // const double r = (double(rand()) / double(RAND_MAX)); + const float r = frand(); - return r < p; + return r < p; } double BC7CompressionModeSIMD::OptimizeEndpointsForCluster(const RGBAClusterSIMD &cluster, RGBAVectorSIMD &p1, RGBAVectorSIMD &p2, __m128i *bestIndices, int &bestPbitCombo) const { - - const int nBuckets = (1 << GetNumberOfBitsPerIndex()); - const int nPbitCombos = GetNumPbitCombos(); - __m128i qmask; - GetQuantizationMask(qmask); + + const int nBuckets = (1 << GetNumberOfBitsPerIndex()); + const int nPbitCombos = GetNumPbitCombos(); + __m128i qmask; + GetQuantizationMask(qmask); - // Here we use simulated annealing to traverse the space of clusters to find the best possible endpoints. - float curError = cluster.QuantizedError(p1, p2, nBuckets, qmask, GetPBitCombo(bestPbitCombo), bestIndices); - int curPbitCombo = bestPbitCombo; - float bestError = curError; - RGBAVectorSIMD bp1 = p1, bp2 = p2; + // Here we use simulated annealing to traverse the space of clusters to find the best possible endpoints. + float curError = cluster.QuantizedError(p1, p2, nBuckets, qmask, GetPBitCombo(bestPbitCombo), bestIndices); + int curPbitCombo = bestPbitCombo; + float bestError = curError; + RGBAVectorSIMD bp1 = p1, bp2 = p2; - assert(curError == cluster.QuantizedError(p1, p2, nBuckets, qmask, GetPBitCombo(bestPbitCombo))); + assert(curError == cluster.QuantizedError(p1, p2, nBuckets, qmask, GetPBitCombo(bestPbitCombo))); - __m128i precVec = _mm_setr_epi32( GetRedChannelPrecision(), GetGreenChannelPrecision(), GetBlueChannelPrecision(), GetAlphaChannelPrecision() ); - const __m128i precMask = _mm_xor_si128( _mm_cmpeq_epi32( precVec, *((const __m128i *)kZeroVector) ), *((const __m128i *)kNegOneVec) ); - precVec = _mm_sub_epi32( *((const __m128i *)kSevenVec), precVec ); - precVec = _mm_slli_epi32( precVec, 23 ); - precVec = _mm_or_si128( precVec, *((const __m128i *)kFloatSignBit) ); - - //__m128 stepSzVec = _mm_set1_ps(1.0f); - //__m128 stepVec = _mm_mul_ps( stepSzVec, _mm_castsi128_ps( _mm_and_si128( precMask, precVec ) ) ); - __m128 stepVec = _mm_castsi128_ps( _mm_and_si128( precMask, precVec ) ); + __m128i precVec = _mm_setr_epi32( GetRedChannelPrecision(), GetGreenChannelPrecision(), GetBlueChannelPrecision(), GetAlphaChannelPrecision() ); + const __m128i precMask = _mm_xor_si128( _mm_cmpeq_epi32( precVec, *((const __m128i *)kZeroVector) ), *((const __m128i *)kNegOneVec) ); + precVec = _mm_sub_epi32( *((const __m128i *)kSevenVec), precVec ); + precVec = _mm_slli_epi32( precVec, 23 ); + precVec = _mm_or_si128( precVec, *((const __m128i *)kFloatSignBit) ); + + //__m128 stepSzVec = _mm_set1_ps(1.0f); + //__m128 stepVec = _mm_mul_ps( stepSzVec, _mm_castsi128_ps( _mm_and_si128( precMask, precVec ) ) ); + __m128 stepVec = _mm_castsi128_ps( _mm_and_si128( precMask, precVec ) ); - const int maxEnergy = MaxAnnealingIterations; - for(int energy = 0; bestError > 0 && energy < maxEnergy; energy++) { + const int maxEnergy = MaxAnnealingIterations; + for(int energy = 0; bestError > 0 && energy < maxEnergy; energy++) { - float temp = float(energy) / float(maxEnergy-1); + float temp = float(energy) / float(maxEnergy-1); - __m128i indices[kMaxNumDataPoints/4]; - RGBAVectorSIMD np1, np2; - int nPbitCombo; + __m128i indices[kMaxNumDataPoints/4]; + RGBAVectorSIMD np1, np2; + int nPbitCombo; - PickBestNeighboringEndpoints(cluster, p1, p2, curPbitCombo, np1, np2, nPbitCombo, stepVec); + PickBestNeighboringEndpoints(cluster, p1, p2, curPbitCombo, np1, np2, nPbitCombo, stepVec); - float error = cluster.QuantizedError(np1, np2, nBuckets, qmask, GetPBitCombo(nPbitCombo), indices); - if(AcceptNewEndpointError(error, curError, temp)) { - curError = error; - p1 = np1; - p2 = np2; - curPbitCombo = nPbitCombo; - } + float error = cluster.QuantizedError(np1, np2, nBuckets, qmask, GetPBitCombo(nPbitCombo), indices); + if(AcceptNewEndpointError(error, curError, temp)) { + curError = error; + p1 = np1; + p2 = np2; + curPbitCombo = nPbitCombo; + } - if(error < bestError) { - memcpy(bestIndices, indices, sizeof(indices)); - bp1 = np1; - bp2 = np2; - bestPbitCombo = nPbitCombo; - bestError = error; + if(error < bestError) { + memcpy(bestIndices, indices, sizeof(indices)); + bp1 = np1; + bp2 = np2; + bestPbitCombo = nPbitCombo; + bestError = error; - // Restart... - energy = 0; - } - } + // Restart... + energy = 0; + } + } - p1 = bp1; - p2 = bp2; + p1 = bp1; + p2 = bp2; - return bestError; + return bestError; } double BC7CompressionModeSIMD::CompressCluster(const RGBAClusterSIMD &cluster, RGBAVectorSIMD &p1, RGBAVectorSIMD &p2, __m128i *bestIndices, int &bestPbitCombo) const { - - // If all the points are the same in the cluster, then we need to figure out what the best - // approximation to this point is.... - if(cluster.AllSamePoint()) { - const RGBAVectorSIMD &p = cluster.GetPoint(0); - double bestErr = CompressSingleColor(p, p1, p2, bestPbitCombo); + + // If all the points are the same in the cluster, then we need to figure out what the best + // approximation to this point is.... + if(cluster.AllSamePoint()) { + const RGBAVectorSIMD &p = cluster.GetPoint(0); + double bestErr = CompressSingleColor(p, p1, p2, bestPbitCombo); - // We're assuming all indices will be index 1... - for(int i = 0; i < 4; i++) { - bestIndices[i] = _mm_set1_epi32(1); - } - - return bestErr; - } - - const int nBuckets = (1 << GetNumberOfBitsPerIndex()); - const int nPbitCombos = GetNumPbitCombos(); + // We're assuming all indices will be index 1... + for(int i = 0; i < 4; i++) { + bestIndices[i] = _mm_set1_epi32(1); + } + + return bestErr; + } + + const int nBuckets = (1 << GetNumberOfBitsPerIndex()); + const int nPbitCombos = GetNumPbitCombos(); - RGBAVectorSIMD avg = cluster.GetTotal() / float(cluster.GetNumPoints()); - RGBADirSIMD axis; - ::GetPrincipalAxis(cluster, axis); + RGBAVectorSIMD avg = cluster.GetTotal() / float(cluster.GetNumPoints()); + RGBADirSIMD axis; + ::GetPrincipalAxis(cluster, axis); - float mindp = FLT_MAX, maxdp = -FLT_MAX; - for(int i = 0 ; i < cluster.GetNumPoints(); i++) { - float dp = (cluster.GetPoint(i) - avg) * axis; - if(dp < mindp) mindp = dp; - if(dp > maxdp) maxdp = dp; - } + float mindp = FLT_MAX, maxdp = -FLT_MAX; + for(int i = 0 ; i < cluster.GetNumPoints(); i++) { + float dp = (cluster.GetPoint(i) - avg) * axis; + if(dp < mindp) mindp = dp; + if(dp > maxdp) maxdp = dp; + } - RGBAVectorSIMD pts[1 << 4]; // At most 4 bits per index. - float numPts[1<<4]; - assert(nBuckets <= 1 << 4); - - p1 = avg + mindp * axis; - p2 = avg + maxdp * axis; + RGBAVectorSIMD pts[1 << 4]; // At most 4 bits per index. + float numPts[1<<4]; + assert(nBuckets <= 1 << 4); + + p1 = avg + mindp * axis; + p2 = avg + maxdp * axis; - ClampEndpoints(p1, p2); + ClampEndpoints(p1, p2); - for(int i = 0; i < nBuckets; i++) { - float s = (float(i) / float(nBuckets - 1)); - pts[i] = (1.0f - s) * p1 + s * p2; - } + for(int i = 0; i < nBuckets; i++) { + float s = (float(i) / float(nBuckets - 1)); + pts[i] = (1.0f - s) * p1 + s * p2; + } - assert(pts[0] == p1); - assert(pts[nBuckets - 1] == p2); + assert(pts[0] == p1); + assert(pts[nBuckets - 1] == p2); - // Do k-means clustering... - int bucketIdx[kMaxNumDataPoints]; + // Do k-means clustering... + int bucketIdx[kMaxNumDataPoints]; - bool fixed = false; - while(!fixed) { - - RGBAVectorSIMD newPts[1 << 4]; + bool fixed = false; + while(!fixed) { + + RGBAVectorSIMD newPts[1 << 4]; - // Assign each of the existing points to one of the buckets... - for(int i = 0; i < cluster.GetNumPoints(); i++) { + // Assign each of the existing points to one of the buckets... + for(int i = 0; i < cluster.GetNumPoints(); i++) { - int minBucket = -1; - float minDist = FLT_MAX; - for(int j = 0; j < nBuckets; j++) { - RGBAVectorSIMD v = cluster.GetPoint(i) - pts[j]; - float distSq = v * v; - if(distSq < minDist) - { - minDist = distSq; - minBucket = j; - } - } + int minBucket = -1; + float minDist = FLT_MAX; + for(int j = 0; j < nBuckets; j++) { + RGBAVectorSIMD v = cluster.GetPoint(i) - pts[j]; + float distSq = v * v; + if(distSq < minDist) + { + minDist = distSq; + minBucket = j; + } + } - assert(minBucket >= 0); - bucketIdx[i] = minBucket; - } + assert(minBucket >= 0); + bucketIdx[i] = minBucket; + } - // Calculate new buckets based on centroids of clusters... - for(int i = 0; i < nBuckets; i++) { - - numPts[i] = 0.0f; - newPts[i] = RGBAVectorSIMD(0.0f); - for(int j = 0; j < cluster.GetNumPoints(); j++) { - if(bucketIdx[j] == i) { - numPts[i] += 1.0f; - newPts[i] += cluster.GetPoint(j); - } - } + // Calculate new buckets based on centroids of clusters... + for(int i = 0; i < nBuckets; i++) { + + numPts[i] = 0.0f; + newPts[i] = RGBAVectorSIMD(0.0f); + for(int j = 0; j < cluster.GetNumPoints(); j++) { + if(bucketIdx[j] == i) { + numPts[i] += 1.0f; + newPts[i] += cluster.GetPoint(j); + } + } - // If there are no points in this cluster, then it should - // remain the same as last time and avoid a divide by zero. - if(0.0f != numPts[i]) - newPts[i] /= numPts[i]; - } + // If there are no points in this cluster, then it should + // remain the same as last time and avoid a divide by zero. + if(0.0f != numPts[i]) + newPts[i] /= numPts[i]; + } - // If we haven't changed, then we're done. - fixed = true; - for(int i = 0; i < nBuckets; i++) { - if(pts[i] != newPts[i]) - fixed = false; - } + // If we haven't changed, then we're done. + fixed = true; + for(int i = 0; i < nBuckets; i++) { + if(pts[i] != newPts[i]) + fixed = false; + } - // Assign the new points to be the old points. - for(int i = 0; i < nBuckets; i++) { - pts[i] = newPts[i]; - } - } + // Assign the new points to be the old points. + for(int i = 0; i < nBuckets; i++) { + pts[i] = newPts[i]; + } + } - // If there's only one bucket filled, then just compress for that single color... - int numBucketsFilled = 0, lastFilledBucket = -1; - for(int i = 0; i < nBuckets; i++) { - if(numPts[i] > 0.0f) { - numBucketsFilled++; - lastFilledBucket = i; - } - } + // If there's only one bucket filled, then just compress for that single color... + int numBucketsFilled = 0, lastFilledBucket = -1; + for(int i = 0; i < nBuckets; i++) { + if(numPts[i] > 0.0f) { + numBucketsFilled++; + lastFilledBucket = i; + } + } - assert(numBucketsFilled > 0); - if(1 == numBucketsFilled) { - const RGBAVectorSIMD &p = pts[lastFilledBucket]; - double bestErr = CompressSingleColor(p, p1, p2, bestPbitCombo); + assert(numBucketsFilled > 0); + if(1 == numBucketsFilled) { + const RGBAVectorSIMD &p = pts[lastFilledBucket]; + double bestErr = CompressSingleColor(p, p1, p2, bestPbitCombo); - // We're assuming all indices will be index 1... - for(int i = 0; i < 4; i++) { - bestIndices[i] = _mm_set1_epi32(1); - } - - return bestErr; - } + // We're assuming all indices will be index 1... + for(int i = 0; i < 4; i++) { + bestIndices[i] = _mm_set1_epi32(1); + } + + return bestErr; + } - // Now that we know the index of each pixel, we can assign the endpoints based on a least squares fit - // of the clusters. For more information, take a look at this article by NVidia: - // http://developer.download.nvidia.com/compute/cuda/1.1-Beta/x86_website/projects/dxtc/doc/cuda_dxtc.pdf - float asq = 0.0, bsq = 0.0, ab = 0.0; - RGBAVectorSIMD ax(0.0f), bx(0.0f); - for(int i = 0; i < nBuckets; i++) { - float a = float(nBuckets - 1 - i) / float(nBuckets - 1); - float b = float(i) / float(nBuckets - 1); + // Now that we know the index of each pixel, we can assign the endpoints based on a least squares fit + // of the clusters. For more information, take a look at this article by NVidia: + // http://developer.download.nvidia.com/compute/cuda/1.1-Beta/x86_website/projects/dxtc/doc/cuda_dxtc.pdf + float asq = 0.0, bsq = 0.0, ab = 0.0; + RGBAVectorSIMD ax(0.0f), bx(0.0f); + for(int i = 0; i < nBuckets; i++) { + float a = float(nBuckets - 1 - i) / float(nBuckets - 1); + float b = float(i) / float(nBuckets - 1); - float n = numPts[i]; - RGBAVectorSIMD x = pts[i]; + float n = numPts[i]; + RGBAVectorSIMD x = pts[i]; - asq += n * a * a; - bsq += n * b * b; - ab += n * a * b; + asq += n * a * a; + bsq += n * b * b; + ab += n * a * b; - ax += x * a * n; - bx += x * b * n; - } + ax += x * a * n; + bx += x * b * n; + } - float f = 1.0f / (asq * bsq - ab * ab); - p1 = f * (ax * bsq - bx * ab); - p2 = f * (bx * asq - ax * ab); + float f = 1.0f / (asq * bsq - ab * ab); + p1 = f * (ax * bsq - bx * ab); + p2 = f * (bx * asq - ax * ab); - ClampEndpointsToGrid(p1, p2, bestPbitCombo); - - #ifdef _DEBUG - int pBitCombo = bestPbitCombo; - RGBAVectorSIMD tp1 = p1, tp2 = p2; - ClampEndpointsToGrid(tp1, tp2, pBitCombo); - - assert(p1 == tp1); - assert(p2 == tp2); - assert(pBitCombo == bestPbitCombo); - #endif - - assert(bestPbitCombo >= 0); - - return OptimizeEndpointsForCluster(cluster, p1, p2, bestIndices, bestPbitCombo); -} - -double BC7CompressionModeSIMD::Compress(BitStream &stream, const int shapeIdx, const RGBAClusterSIMD *clusters) const { - - const int kModeNumber = GetModeNumber(); - const int nPartitionBits = GetNumberOfPartitionBits(); - const int nSubsets = GetNumberOfSubsets(); - - // Mode # - stream.WriteBits(1 << kModeNumber, kModeNumber + 1); - - // Partition # - assert((((1 << nPartitionBits) - 1) & shapeIdx) == shapeIdx); - stream.WriteBits(shapeIdx, nPartitionBits); - - RGBAVectorSIMD p1[kMaxNumSubsets], p2[kMaxNumSubsets]; - int bestIndices[kMaxNumSubsets][kMaxNumDataPoints] = { - { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, - { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, - { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } - }; - int bestPbitCombo[kMaxNumSubsets] = { -1, -1, -1 }; - - double totalErr = 0.0; - for(int cidx = 0; cidx < nSubsets; cidx++) { - ALIGN_SSE int indices[kMaxNumDataPoints]; - - // Compress this cluster - totalErr += CompressCluster(clusters[cidx], p1[cidx], p2[cidx], (__m128i *)indices, bestPbitCombo[cidx]); - - // !SPEED! We can precompute the subsets for each index based on the shape. This - // isn't the bottleneck for the compressor, but it could prove to be a little - // faster... - - // Map the indices to their proper position. - int idx = 0; - for(int i = 0; i < 16; i++) { - int subs = GetSubsetForIndex(i, shapeIdx); - if(subs == cidx) { - bestIndices[cidx][i] = indices[idx++]; - } - } - } + ClampEndpointsToGrid(p1, p2, bestPbitCombo); #ifdef _DEBUG - for(int i = 0; i < kMaxNumDataPoints; i++) { + int pBitCombo = bestPbitCombo; + RGBAVectorSIMD tp1 = p1, tp2 = p2; + ClampEndpointsToGrid(tp1, tp2, pBitCombo); - int nSet = 0; - for(int j = 0; j < nSubsets; j++) { - if(bestIndices[j][i] >= 0) - nSet++; - } - - assert(nSet == 1); - } + assert(p1 == tp1); + assert(p2 == tp2); + assert(pBitCombo == bestPbitCombo); #endif - // Get the quantization mask - __m128i qmask; - GetQuantizationMask(qmask); + assert(bestPbitCombo >= 0); - //Quantize the points... - __m128i pixel1[kMaxNumSubsets], pixel2[kMaxNumSubsets]; - for(int i = 0; i < nSubsets; i++) { - switch(GetPBitType()) { - default: - case ePBitType_None: - pixel1[i] = p1[i].ToPixel(qmask); - pixel2[i] = p2[i].ToPixel(qmask); - break; + return OptimizeEndpointsForCluster(cluster, p1, p2, bestIndices, bestPbitCombo); +} - case ePBitType_Shared: - case ePBitType_NotShared: - pixel1[i] = p1[i].ToPixel(qmask, GetPBitCombo(bestPbitCombo[i])[0]); - pixel2[i] = p2[i].ToPixel(qmask, GetPBitCombo(bestPbitCombo[i])[1]); - break; - } - } +double BC7CompressionModeSIMD::Compress(BitStream &stream, const int shapeIdx, const RGBAClusterSIMD *clusters) const { - // If the anchor index does not have 0 in the leading bit, then - // we need to swap EVERYTHING. - for(int sidx = 0; sidx < nSubsets; sidx++) { + const int kModeNumber = GetModeNumber(); + const int nPartitionBits = GetNumberOfPartitionBits(); + const int nSubsets = GetNumberOfSubsets(); - int anchorIdx = GetAnchorIndexForSubset(sidx, shapeIdx); - assert(bestIndices[sidx][anchorIdx] != -1); + // Mode # + stream.WriteBits(1 << kModeNumber, kModeNumber + 1); - int nIndexBits = GetNumberOfBitsPerIndex(); - if(bestIndices[sidx][anchorIdx] >> (nIndexBits - 1)) { - __m128i t = pixel1[sidx]; pixel1[sidx] = pixel2[sidx]; pixel2[sidx] = t; + // Partition # + assert((((1 << nPartitionBits) - 1) & shapeIdx) == shapeIdx); + stream.WriteBits(shapeIdx, nPartitionBits); + + RGBAVectorSIMD p1[kMaxNumSubsets], p2[kMaxNumSubsets]; + int bestIndices[kMaxNumSubsets][kMaxNumDataPoints] = { + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } + }; + int bestPbitCombo[kMaxNumSubsets] = { -1, -1, -1 }; - int nIndexVals = 1 << nIndexBits; - for(int i = 0; i < 16; i++) { - bestIndices[sidx][i] = (nIndexVals - 1) - bestIndices[sidx][i]; - } - } + double totalErr = 0.0; + for(int cidx = 0; cidx < nSubsets; cidx++) { + ALIGN_SSE int indices[kMaxNumDataPoints]; - assert(!(bestIndices[sidx][anchorIdx] >> (nIndexBits - 1))); - } + // Compress this cluster + totalErr += CompressCluster(clusters[cidx], p1[cidx], p2[cidx], (__m128i *)indices, bestPbitCombo[cidx]); - // Get the quantized values... - uint8 r1[kMaxNumSubsets], g1[kMaxNumSubsets], b1[kMaxNumSubsets], a1[kMaxNumSubsets]; - uint8 r2[kMaxNumSubsets], g2[kMaxNumSubsets], b2[kMaxNumSubsets], a2[kMaxNumSubsets]; - for(int i = 0; i < nSubsets; i++) { - r1[i] = ((uint8 *)(&(pixel1[i])))[0]; - r2[i] = ((uint8 *)(&(pixel2[i])))[0]; + // !SPEED! We can precompute the subsets for each index based on the shape. This + // isn't the bottleneck for the compressor, but it could prove to be a little + // faster... - g1[i] = ((uint8 *)(&(pixel1[i])))[4]; - g2[i] = ((uint8 *)(&(pixel2[i])))[4]; + // Map the indices to their proper position. + int idx = 0; + for(int i = 0; i < 16; i++) { + int subs = GetSubsetForIndex(i, shapeIdx); + if(subs == cidx) { + bestIndices[cidx][i] = indices[idx++]; + } + } + } - b1[i] = ((uint8 *)(&(pixel1[i])))[8]; - b2[i] = ((uint8 *)(&(pixel2[i])))[8]; +#ifdef _DEBUG + for(int i = 0; i < kMaxNumDataPoints; i++) { - a1[i] = ((uint8 *)(&(pixel1[i])))[12]; - a2[i] = ((uint8 *)(&(pixel2[i])))[12]; - } + int nSet = 0; + for(int j = 0; j < nSubsets; j++) { + if(bestIndices[j][i] >= 0) + nSet++; + } - // Write them out... - const int nRedBits = GetRedChannelPrecision(); - for(int i = 0; i < nSubsets; i++) { - stream.WriteBits(r1[i] >> (8 - nRedBits), nRedBits); - stream.WriteBits(r2[i] >> (8 - nRedBits), nRedBits); - } + assert(nSet == 1); + } +#endif - const int nGreenBits = GetGreenChannelPrecision(); - for(int i = 0; i < nSubsets; i++) { - stream.WriteBits(g1[i] >> (8 - nGreenBits), nGreenBits); - stream.WriteBits(g2[i] >> (8 - nGreenBits), nGreenBits); - } + // Get the quantization mask + __m128i qmask; + GetQuantizationMask(qmask); - const int nBlueBits = GetBlueChannelPrecision(); - for(int i = 0; i < nSubsets; i++) { - stream.WriteBits(b1[i] >> (8 - nBlueBits), nBlueBits); - stream.WriteBits(b2[i] >> (8 - nBlueBits), nBlueBits); - } + //Quantize the points... + __m128i pixel1[kMaxNumSubsets], pixel2[kMaxNumSubsets]; + for(int i = 0; i < nSubsets; i++) { + switch(GetPBitType()) { + default: + case ePBitType_None: + pixel1[i] = p1[i].ToPixel(qmask); + pixel2[i] = p2[i].ToPixel(qmask); + break; - const int nAlphaBits = GetAlphaChannelPrecision(); - for(int i = 0; i < nSubsets; i++) { - stream.WriteBits(a1[i] >> (8 - nAlphaBits), nAlphaBits); - stream.WriteBits(a2[i] >> (8 - nAlphaBits), nAlphaBits); - } + case ePBitType_Shared: + case ePBitType_NotShared: + pixel1[i] = p1[i].ToPixel(qmask, GetPBitCombo(bestPbitCombo[i])[0]); + pixel2[i] = p2[i].ToPixel(qmask, GetPBitCombo(bestPbitCombo[i])[1]); + break; + } + } - // Write out the best pbits.. - if(GetPBitType() != ePBitType_None) { - for(int s = 0; s < nSubsets; s++) { - const int *pbits = GetPBitCombo(bestPbitCombo[s]); - stream.WriteBits(pbits[0], 1); - if(GetPBitType() != ePBitType_Shared) - stream.WriteBits(pbits[1], 1); - } - } + // If the anchor index does not have 0 in the leading bit, then + // we need to swap EVERYTHING. + for(int sidx = 0; sidx < nSubsets; sidx++) { - for(int i = 0; i < 16; i++) { - const int subs = GetSubsetForIndex(i, shapeIdx); - const int idx = bestIndices[subs][i]; - const int anchorIdx = GetAnchorIndexForSubset(subs, shapeIdx); - const int nBitsForIdx = GetNumberOfBitsPerIndex(); - assert(idx >= 0 && idx < (1 << nBitsForIdx)); - assert(i != anchorIdx || !(idx >> (nBitsForIdx - 1)) || !"Leading bit of anchor index is not zero!"); - stream.WriteBits(idx, (i == anchorIdx)? nBitsForIdx - 1 : nBitsForIdx); - } + int anchorIdx = GetAnchorIndexForSubset(sidx, shapeIdx); + assert(bestIndices[sidx][anchorIdx] != -1); - assert(stream.GetBitsWritten() == 128); - return totalErr; + int nIndexBits = GetNumberOfBitsPerIndex(); + if(bestIndices[sidx][anchorIdx] >> (nIndexBits - 1)) { + __m128i t = pixel1[sidx]; pixel1[sidx] = pixel2[sidx]; pixel2[sidx] = t; + + int nIndexVals = 1 << nIndexBits; + for(int i = 0; i < 16; i++) { + bestIndices[sidx][i] = (nIndexVals - 1) - bestIndices[sidx][i]; + } + } + + assert(!(bestIndices[sidx][anchorIdx] >> (nIndexBits - 1))); + } + + // Get the quantized values... + uint8 r1[kMaxNumSubsets], g1[kMaxNumSubsets], b1[kMaxNumSubsets], a1[kMaxNumSubsets]; + uint8 r2[kMaxNumSubsets], g2[kMaxNumSubsets], b2[kMaxNumSubsets], a2[kMaxNumSubsets]; + for(int i = 0; i < nSubsets; i++) { + r1[i] = ((uint8 *)(&(pixel1[i])))[0]; + r2[i] = ((uint8 *)(&(pixel2[i])))[0]; + + g1[i] = ((uint8 *)(&(pixel1[i])))[4]; + g2[i] = ((uint8 *)(&(pixel2[i])))[4]; + + b1[i] = ((uint8 *)(&(pixel1[i])))[8]; + b2[i] = ((uint8 *)(&(pixel2[i])))[8]; + + a1[i] = ((uint8 *)(&(pixel1[i])))[12]; + a2[i] = ((uint8 *)(&(pixel2[i])))[12]; + } + + // Write them out... + const int nRedBits = GetRedChannelPrecision(); + for(int i = 0; i < nSubsets; i++) { + stream.WriteBits(r1[i] >> (8 - nRedBits), nRedBits); + stream.WriteBits(r2[i] >> (8 - nRedBits), nRedBits); + } + + const int nGreenBits = GetGreenChannelPrecision(); + for(int i = 0; i < nSubsets; i++) { + stream.WriteBits(g1[i] >> (8 - nGreenBits), nGreenBits); + stream.WriteBits(g2[i] >> (8 - nGreenBits), nGreenBits); + } + + const int nBlueBits = GetBlueChannelPrecision(); + for(int i = 0; i < nSubsets; i++) { + stream.WriteBits(b1[i] >> (8 - nBlueBits), nBlueBits); + stream.WriteBits(b2[i] >> (8 - nBlueBits), nBlueBits); + } + + const int nAlphaBits = GetAlphaChannelPrecision(); + for(int i = 0; i < nSubsets; i++) { + stream.WriteBits(a1[i] >> (8 - nAlphaBits), nAlphaBits); + stream.WriteBits(a2[i] >> (8 - nAlphaBits), nAlphaBits); + } + + // Write out the best pbits.. + if(GetPBitType() != ePBitType_None) { + for(int s = 0; s < nSubsets; s++) { + const int *pbits = GetPBitCombo(bestPbitCombo[s]); + stream.WriteBits(pbits[0], 1); + if(GetPBitType() != ePBitType_Shared) + stream.WriteBits(pbits[1], 1); + } + } + + for(int i = 0; i < 16; i++) { + const int subs = GetSubsetForIndex(i, shapeIdx); + const int idx = bestIndices[subs][i]; + const int anchorIdx = GetAnchorIndexForSubset(subs, shapeIdx); + const int nBitsForIdx = GetNumberOfBitsPerIndex(); + assert(idx >= 0 && idx < (1 << nBitsForIdx)); + assert(i != anchorIdx || !(idx >> (nBitsForIdx - 1)) || !"Leading bit of anchor index is not zero!"); + stream.WriteBits(idx, (i == anchorIdx)? nBitsForIdx - 1 : nBitsForIdx); + } + + assert(stream.GetBitsWritten() == 128); + return totalErr; } namespace BC7C { - // Function prototypes - static void ExtractBlock(const uint8* inPtr, int width, uint32* colorBlock); - static void CompressBC7Block(const uint32 *block, uint8 *outBuf); - - // Returns true if the entire block is a single color. - static bool AllOneColor(const uint32 block[16]) { - const uint32 pixel = block[0]; - for(int i = 1; i < 16; i++) { - if( block[i] != pixel ) - return false; - } - - return true; - } - - // Write out a transparent block. - static void WriteTransparentBlock(BitStream &stream) { - // Use mode 6 - stream.WriteBits(1 << 6, 7); - stream.WriteBits(0, 128-7); - assert(stream.GetBitsWritten() == 128); - } - - // Compresses a single color optimally and outputs the result. - static void CompressOptimalColorBC7(uint32 pixel, BitStream &stream) { - - stream.WriteBits(1 << 5, 6); // Mode 5 - stream.WriteBits(0, 2); // No rotation bits. - - uint8 r = pixel & 0xFF; - uint8 g = (pixel >> 8) & 0xFF; - uint8 b = (pixel >> 16) & 0xFF; - uint8 a = (pixel >> 24) & 0xFF; - - // Red endpoints - stream.WriteBits(Optimal7CompressBC7Mode5[r][0], 7); - stream.WriteBits(Optimal7CompressBC7Mode5[r][1], 7); - - // Green endpoints - stream.WriteBits(Optimal7CompressBC7Mode5[g][0], 7); - stream.WriteBits(Optimal7CompressBC7Mode5[g][1], 7); - - // Blue endpoints - stream.WriteBits(Optimal7CompressBC7Mode5[b][0], 7); - stream.WriteBits(Optimal7CompressBC7Mode5[b][1], 7); - - // Alpha endpoints... are just the same. - stream.WriteBits(a, 8); - stream.WriteBits(a, 8); - - // Color indices are 1 for each pixel... - // Anchor index is 0, so 1 bit for the first pixel, then - // 01 for each following pixel giving the sequence of 31 bits: - // ...010101011 - stream.WriteBits(0xaaaaaaab, 31); - - // Alpha indices... - stream.WriteBits(kWMValues[gWMVal = (gWMVal+1) % kNumWMVals], 31); - } - - // Compress an image using BC7 compression. Use the inBuf parameter to point to an image in - // 4-byte RGBA format. The width and height parameters specify the size of the image in pixels. - // The buffer pointed to by outBuf should be large enough to store the compressed image. This - // implementation has an 4:1 compression ratio. - void CompressImageBC7SIMD(const unsigned char *inBuf, unsigned char *outBuf, unsigned int width, unsigned int height) - { - ALIGN_SSE uint32 block[16]; - - _MM_SET_ROUNDING_MODE( _MM_ROUND_TOWARD_ZERO ); - BC7CompressionModeSIMD::ResetNumUses(); - - BC7CompressionModeSIMD::MaxAnnealingIterations = GetQualityLevel(); - - for(int j = 0; j < height; j += 4) - { - for(int i = 0; i < width; i += 4) - { - CompressBC7Block((const uint32 *)inBuf, outBuf); - - outBuf += 16; - inBuf += 64; - } - } - } - - // Extract a 4 by 4 block of pixels from inPtr and store it in colorBlock. The width parameter - // specifies the size of the image in pixels. - static void ExtractBlock(const uint8* inPtr, int width, uint32* colorBlock) - { - // Compute the stride. - const int stride = width * 4; - - // Copy the first row of pixels from inPtr into colorBlock. - _mm_store_si128((__m128i*)colorBlock, _mm_load_si128((__m128i*)inPtr)); - inPtr += stride; - - // Copy the second row of pixels from inPtr into colorBlock. - _mm_store_si128((__m128i*)(colorBlock + 4), _mm_load_si128((__m128i*)inPtr)); - inPtr += stride; - - // Copy the third row of pixels from inPtr into colorBlock. - _mm_store_si128((__m128i*)(colorBlock + 8), _mm_load_si128((__m128i*)inPtr)); - inPtr += stride; - - // Copy the forth row of pixels from inPtr into colorBlock. - _mm_store_si128((__m128i*)(colorBlock + 12), _mm_load_si128((__m128i*)inPtr)); - } - - static double CompressTwoClusters(int shapeIdx, const RGBAClusterSIMD *clusters, uint8 *outBuf, double estimatedError) { - - uint8 tempBuf1[16]; - BitStream tmpStream1(tempBuf1, 128, 0); - BC7CompressionModeSIMD compressor1(1, estimatedError); - - double bestError = compressor1.Compress(tmpStream1, shapeIdx, clusters); - memcpy(outBuf, tempBuf1, 16); - if(bestError == 0.0) { - return 0.0; - } - - uint8 tempBuf3[16]; - BitStream tmpStream3(tempBuf3, 128, 0); - BC7CompressionModeSIMD compressor3(3, estimatedError); - - double error; - if((error = compressor3.Compress(tmpStream3, shapeIdx, clusters)) < bestError) { - bestError = error; - memcpy(outBuf, tempBuf3, 16); - if(bestError == 0.0) { - return 0.0; - } - } - - // Mode 3 offers more precision for RGB data. Mode 7 is really only if we have alpha. - //uint8 tempBuf7[16]; - //BitStream tmpStream7(tempBuf7, 128, 0); - //BC7CompressionModeSIMD compressor7(7, estimatedError); - //if((error = compressor7.Compress(tmpStream7, shapeIdx, clusters)) < bestError) { - // memcpy(outBuf, tempBuf7, 16); - // return error; - //} - - return bestError; - } - - static double CompressThreeClusters(int shapeIdx, const RGBAClusterSIMD *clusters, uint8 *outBuf, double estimatedError) { - - uint8 tempBuf0[16]; - BitStream tmpStream0(tempBuf0, 128, 0); - - uint8 tempBuf2[16]; - BitStream tmpStream2(tempBuf2, 128, 0); - - BC7CompressionModeSIMD compressor0(0, estimatedError); - BC7CompressionModeSIMD compressor2(2, estimatedError); - - double error, bestError = (shapeIdx < 16)? compressor0.Compress(tmpStream0, shapeIdx, clusters) : DBL_MAX; - memcpy(outBuf, tempBuf0, 16); - if(bestError == 0.0) { - return 0.0; - } - - if((error = compressor2.Compress(tmpStream2, shapeIdx, clusters)) < bestError) { - memcpy(outBuf, tempBuf2, 16); - return error; - } - - return bestError; - } - - static void PopulateTwoClustersForShape(const RGBAClusterSIMD &points, int shapeIdx, RGBAClusterSIMD *clusters) { - const uint16 shape = kShapeMask2[shapeIdx]; - for(int pt = 0; pt < kMaxNumDataPoints; pt++) { - - const RGBAVectorSIMD &p = points.GetPoint(pt); - - if((1 << pt) & shape) - clusters[1].AddPoint(p, pt); - else - clusters[0].AddPoint(p, pt); - } - - assert(!(clusters[0].GetPointBitString() & clusters[1].GetPointBitString())); - assert((clusters[0].GetPointBitString() ^ clusters[1].GetPointBitString()) == 0xFFFF); - assert((shape & clusters[1].GetPointBitString()) == shape); - } - - static void PopulateThreeClustersForShape(const RGBAClusterSIMD &points, int shapeIdx, RGBAClusterSIMD *clusters) { - for(int pt = 0; pt < kMaxNumDataPoints; pt++) { - - const RGBAVectorSIMD &p = points.GetPoint(pt); - - if((1 << pt) & kShapeMask3[shapeIdx][0]) { - if((1 << pt) & kShapeMask3[shapeIdx][1]) - clusters[2].AddPoint(p, pt); - else - clusters[1].AddPoint(p, pt); - } - else - clusters[0].AddPoint(p, pt); - } - - assert(!(clusters[0].GetPointBitString() & clusters[1].GetPointBitString())); - assert(!(clusters[2].GetPointBitString() & clusters[1].GetPointBitString())); - assert(!(clusters[0].GetPointBitString() & clusters[2].GetPointBitString())); - } - - static double EstimateTwoClusterError(RGBAClusterSIMD &c) { - RGBAVectorSIMD Min, Max, v; - c.GetBoundingBox(Min, Max); - v = Max - Min; - if(v * v == 0) { - return 0.0; - } - - return 0.0001 + c.QuantizedError(Min, Max, 8, _mm_set1_epi32(0xFF)); - } - - static double EstimateThreeClusterError(RGBAClusterSIMD &c) { - RGBAVectorSIMD Min, Max, v; - c.GetBoundingBox(Min, Max); - v = Max - Min; - if(v * v == 0) { - return 0.0; - } - - return 0.0001 + c.QuantizedError(Min, Max, 4, _mm_set1_epi32(0xFF)); - } - - // Compress a single block. - void CompressBC7Block(const uint32 *block, uint8 *outBuf) { - - // All a single color? - if(AllOneColor(block)) { - BitStream bStrm(outBuf, 128, 0); - CompressOptimalColorBC7(*((const uint32 *)block), bStrm); - return; - } - - RGBAClusterSIMD blockCluster; - bool opaque = true; - bool transparent = true; - - for(int i = 0; i < kMaxNumDataPoints; i++) { - RGBAVectorSIMD p = RGBAVectorSIMD(block[i]); - blockCluster.AddPoint(p, i); - if(fabs(p.a - 255.0f) > 1e-10) - opaque = false; - - if(p.a > 0.0f) - transparent = false; - } - - // The whole block is transparent? - if(transparent) { - BitStream bStrm(outBuf, 128, 0); - WriteTransparentBlock(bStrm); - return; - } - - // First we must figure out which shape to use. To do this, simply - // see which shape has the smallest sum of minimum bounding spheres. - double bestError[2] = { DBL_MAX, DBL_MAX }; - int bestShapeIdx[2] = { -1, -1 }; - RGBAClusterSIMD bestClusters[2][3]; - - for(int i = 0; i < kNumShapes2; i++) - { - RGBAClusterSIMD clusters[2]; - PopulateTwoClustersForShape(blockCluster, i, clusters); - - double err = 0.0; - for(int ci = 0; ci < 2; ci++) { - err += EstimateTwoClusterError(clusters[ci]); - } - - // If it's small, we'll take it! - if(err < 1e-9) { - CompressTwoClusters(i, clusters, outBuf, err); - return; - } - - if(err < bestError[0]) { - bestError[0] = err; - bestShapeIdx[0] = i; - bestClusters[0][0] = clusters[0]; - bestClusters[0][1] = clusters[1]; - } - } - - // There are not 3 subset blocks that support alpha... - if(opaque) { - for(int i = 0; i < kNumShapes3; i++) { - - RGBAClusterSIMD clusters[3]; - PopulateThreeClustersForShape(blockCluster, i, clusters); - - double err = 0.0; - for(int ci = 0; ci < 3; ci++) { - err += EstimateThreeClusterError(clusters[ci]); - } - - // If it's small, we'll take it! - if(err < 1e-9) { - CompressThreeClusters(i, clusters, outBuf, err); - return; - } - - if(err < bestError[1]) { - bestError[1] = err; - bestShapeIdx[1] = i; - bestClusters[1][0] = clusters[0]; - bestClusters[1][1] = clusters[1]; - bestClusters[1][2] = clusters[2]; - } - } - } - - if(opaque) { - - uint8 tempBuf1[16]; - uint8 tempBuf2[16]; - - BitStream tempStream1 (tempBuf1, 128, 0); - BC7CompressionModeSIMD compressor(6, DBL_MAX); - double best = compressor.Compress(tempStream1, 0, &blockCluster); - if(best == 0.0f) { - memcpy(outBuf, tempBuf1, 16); - return; - } - - double error = DBL_MAX; - if((error = CompressTwoClusters(bestShapeIdx[0], bestClusters[0], tempBuf2, bestError[0])) < best) { - best = error; - if(error == 0.0f) { - memcpy(outBuf, tempBuf2, 16); - return; - } - else { - memcpy(tempBuf1, tempBuf2, 16); - } - } - - if(CompressThreeClusters(bestShapeIdx[1], bestClusters[1], tempBuf2, bestError[1]) < best) { - memcpy(outBuf, tempBuf2, 16); - return; - } - - memcpy(outBuf, tempBuf1, 16); - } - else { - assert(!"Don't support alpha yet!"); - } - } + // Function prototypes + static void ExtractBlock(const uint8* inPtr, int width, uint32* colorBlock); + static void CompressBC7Block(const uint32 *block, uint8 *outBuf); + + // Returns true if the entire block is a single color. + static bool AllOneColor(const uint32 block[16]) { + const uint32 pixel = block[0]; + for(int i = 1; i < 16; i++) { + if( block[i] != pixel ) + return false; + } + + return true; + } + + // Write out a transparent block. + static void WriteTransparentBlock(BitStream &stream) { + // Use mode 6 + stream.WriteBits(1 << 6, 7); + stream.WriteBits(0, 128-7); + assert(stream.GetBitsWritten() == 128); + } + + // Compresses a single color optimally and outputs the result. + static void CompressOptimalColorBC7(uint32 pixel, BitStream &stream) { + + stream.WriteBits(1 << 5, 6); // Mode 5 + stream.WriteBits(0, 2); // No rotation bits. + + uint8 r = pixel & 0xFF; + uint8 g = (pixel >> 8) & 0xFF; + uint8 b = (pixel >> 16) & 0xFF; + uint8 a = (pixel >> 24) & 0xFF; + + // Red endpoints + stream.WriteBits(Optimal7CompressBC7Mode5[r][0], 7); + stream.WriteBits(Optimal7CompressBC7Mode5[r][1], 7); + + // Green endpoints + stream.WriteBits(Optimal7CompressBC7Mode5[g][0], 7); + stream.WriteBits(Optimal7CompressBC7Mode5[g][1], 7); + + // Blue endpoints + stream.WriteBits(Optimal7CompressBC7Mode5[b][0], 7); + stream.WriteBits(Optimal7CompressBC7Mode5[b][1], 7); + + // Alpha endpoints... are just the same. + stream.WriteBits(a, 8); + stream.WriteBits(a, 8); + + // Color indices are 1 for each pixel... + // Anchor index is 0, so 1 bit for the first pixel, then + // 01 for each following pixel giving the sequence of 31 bits: + // ...010101011 + stream.WriteBits(0xaaaaaaab, 31); + + // Alpha indices... + stream.WriteBits(kWMValues[gWMVal = (gWMVal+1) % kNumWMVals], 31); + } + + // Compress an image using BC7 compression. Use the inBuf parameter to point to an image in + // 4-byte RGBA format. The width and height parameters specify the size of the image in pixels. + // The buffer pointed to by outBuf should be large enough to store the compressed image. This + // implementation has an 4:1 compression ratio. + void CompressImageBC7SIMD(const unsigned char *inBuf, unsigned char *outBuf, unsigned int width, unsigned int height) + { + ALIGN_SSE uint32 block[16]; + + _MM_SET_ROUNDING_MODE( _MM_ROUND_TOWARD_ZERO ); + BC7CompressionModeSIMD::ResetNumUses(); + + BC7CompressionModeSIMD::MaxAnnealingIterations = GetQualityLevel(); + + for(int j = 0; j < height; j += 4) { + for(int i = 0; i < width; i += 4) { + CompressBC7Block((const uint32 *)inBuf, outBuf); + + outBuf += 16; + inBuf += 64; + } + } + } + + // Extract a 4 by 4 block of pixels from inPtr and store it in colorBlock. The width parameter + // specifies the size of the image in pixels. + static void ExtractBlock(const uint8* inPtr, int width, uint32* colorBlock) + { + // Compute the stride. + const int stride = width * 4; + + // Copy the first row of pixels from inPtr into colorBlock. + _mm_store_si128((__m128i*)colorBlock, _mm_load_si128((__m128i*)inPtr)); + inPtr += stride; + + // Copy the second row of pixels from inPtr into colorBlock. + _mm_store_si128((__m128i*)(colorBlock + 4), _mm_load_si128((__m128i*)inPtr)); + inPtr += stride; + + // Copy the third row of pixels from inPtr into colorBlock. + _mm_store_si128((__m128i*)(colorBlock + 8), _mm_load_si128((__m128i*)inPtr)); + inPtr += stride; + + // Copy the forth row of pixels from inPtr into colorBlock. + _mm_store_si128((__m128i*)(colorBlock + 12), _mm_load_si128((__m128i*)inPtr)); + } + + static double CompressTwoClusters(int shapeIdx, const RGBAClusterSIMD *clusters, uint8 *outBuf, double estimatedError) { + + uint8 tempBuf1[16]; + BitStream tmpStream1(tempBuf1, 128, 0); + BC7CompressionModeSIMD compressor1(1, estimatedError); + + double bestError = compressor1.Compress(tmpStream1, shapeIdx, clusters); + memcpy(outBuf, tempBuf1, 16); + if(bestError == 0.0) { + return 0.0; + } + + uint8 tempBuf3[16]; + BitStream tmpStream3(tempBuf3, 128, 0); + BC7CompressionModeSIMD compressor3(3, estimatedError); + + double error; + if((error = compressor3.Compress(tmpStream3, shapeIdx, clusters)) < bestError) { + bestError = error; + memcpy(outBuf, tempBuf3, 16); + if(bestError == 0.0) { + return 0.0; + } + } + + // Mode 3 offers more precision for RGB data. Mode 7 is really only if we have alpha. + //uint8 tempBuf7[16]; + //BitStream tmpStream7(tempBuf7, 128, 0); + //BC7CompressionModeSIMD compressor7(7, estimatedError); + //if((error = compressor7.Compress(tmpStream7, shapeIdx, clusters)) < bestError) { + // memcpy(outBuf, tempBuf7, 16); + // return error; + //} + + return bestError; + } + + static double CompressThreeClusters(int shapeIdx, const RGBAClusterSIMD *clusters, uint8 *outBuf, double estimatedError) { + + uint8 tempBuf0[16]; + BitStream tmpStream0(tempBuf0, 128, 0); + + uint8 tempBuf2[16]; + BitStream tmpStream2(tempBuf2, 128, 0); + + BC7CompressionModeSIMD compressor0(0, estimatedError); + BC7CompressionModeSIMD compressor2(2, estimatedError); + + double error, bestError = (shapeIdx < 16)? compressor0.Compress(tmpStream0, shapeIdx, clusters) : DBL_MAX; + memcpy(outBuf, tempBuf0, 16); + if(bestError == 0.0) { + return 0.0; + } + + if((error = compressor2.Compress(tmpStream2, shapeIdx, clusters)) < bestError) { + memcpy(outBuf, tempBuf2, 16); + return error; + } + + return bestError; + } + + static void PopulateTwoClustersForShape(const RGBAClusterSIMD &points, int shapeIdx, RGBAClusterSIMD *clusters) { + const uint16 shape = kShapeMask2[shapeIdx]; + for(int pt = 0; pt < kMaxNumDataPoints; pt++) { + + const RGBAVectorSIMD &p = points.GetPoint(pt); + + if((1 << pt) & shape) + clusters[1].AddPoint(p, pt); + else + clusters[0].AddPoint(p, pt); + } + + assert(!(clusters[0].GetPointBitString() & clusters[1].GetPointBitString())); + assert((clusters[0].GetPointBitString() ^ clusters[1].GetPointBitString()) == 0xFFFF); + assert((shape & clusters[1].GetPointBitString()) == shape); + } + + static void PopulateThreeClustersForShape(const RGBAClusterSIMD &points, int shapeIdx, RGBAClusterSIMD *clusters) { + for(int pt = 0; pt < kMaxNumDataPoints; pt++) { + + const RGBAVectorSIMD &p = points.GetPoint(pt); + + if((1 << pt) & kShapeMask3[shapeIdx][0]) { + if((1 << pt) & kShapeMask3[shapeIdx][1]) + clusters[2].AddPoint(p, pt); + else + clusters[1].AddPoint(p, pt); + } + else + clusters[0].AddPoint(p, pt); + } + + assert(!(clusters[0].GetPointBitString() & clusters[1].GetPointBitString())); + assert(!(clusters[2].GetPointBitString() & clusters[1].GetPointBitString())); + assert(!(clusters[0].GetPointBitString() & clusters[2].GetPointBitString())); + } + + static double EstimateTwoClusterError(RGBAClusterSIMD &c) { + RGBAVectorSIMD Min, Max, v; + c.GetBoundingBox(Min, Max); + v = Max - Min; + if(v * v == 0) { + return 0.0; + } + + return 0.0001 + c.QuantizedError(Min, Max, 8, _mm_set1_epi32(0xFF)); + } + + static double EstimateThreeClusterError(RGBAClusterSIMD &c) { + RGBAVectorSIMD Min, Max, v; + c.GetBoundingBox(Min, Max); + v = Max - Min; + if(v * v == 0) { + return 0.0; + } + + return 0.0001 + c.QuantizedError(Min, Max, 4, _mm_set1_epi32(0xFF)); + } + + // Compress a single block. + void CompressBC7Block(const uint32 *block, uint8 *outBuf) { + + // All a single color? + if(AllOneColor(block)) { + BitStream bStrm(outBuf, 128, 0); + CompressOptimalColorBC7(*((const uint32 *)block), bStrm); + return; + } + + RGBAClusterSIMD blockCluster; + bool opaque = true; + bool transparent = true; + + for(int i = 0; i < kMaxNumDataPoints; i++) { + RGBAVectorSIMD p = RGBAVectorSIMD(block[i]); + blockCluster.AddPoint(p, i); + if(fabs(p.a - 255.0f) > 1e-10) + opaque = false; + + if(p.a > 0.0f) + transparent = false; + } + + // The whole block is transparent? + if(transparent) { + BitStream bStrm(outBuf, 128, 0); + WriteTransparentBlock(bStrm); + return; + } + + // First we must figure out which shape to use. To do this, simply + // see which shape has the smallest sum of minimum bounding spheres. + double bestError[2] = { DBL_MAX, DBL_MAX }; + int bestShapeIdx[2] = { -1, -1 }; + RGBAClusterSIMD bestClusters[2][3]; + + for(int i = 0; i < kNumShapes2; i++) { + RGBAClusterSIMD clusters[2]; + PopulateTwoClustersForShape(blockCluster, i, clusters); + + double err = 0.0; + for(int ci = 0; ci < 2; ci++) { + err += EstimateTwoClusterError(clusters[ci]); + } + + // If it's small, we'll take it! + if(err < 1e-9) { + CompressTwoClusters(i, clusters, outBuf, err); + return; + } + + if(err < bestError[0]) { + bestError[0] = err; + bestShapeIdx[0] = i; + bestClusters[0][0] = clusters[0]; + bestClusters[0][1] = clusters[1]; + } + } + + // There are not 3 subset blocks that support alpha... + if(opaque) { + for(int i = 0; i < kNumShapes3; i++) { + + RGBAClusterSIMD clusters[3]; + PopulateThreeClustersForShape(blockCluster, i, clusters); + + double err = 0.0; + for(int ci = 0; ci < 3; ci++) { + err += EstimateThreeClusterError(clusters[ci]); + } + + // If it's small, we'll take it! + if(err < 1e-9) { + CompressThreeClusters(i, clusters, outBuf, err); + return; + } + + if(err < bestError[1]) { + bestError[1] = err; + bestShapeIdx[1] = i; + bestClusters[1][0] = clusters[0]; + bestClusters[1][1] = clusters[1]; + bestClusters[1][2] = clusters[2]; + } + } + } + + if(opaque) { + + uint8 tempBuf1[16]; + uint8 tempBuf2[16]; + + BitStream tempStream1 (tempBuf1, 128, 0); + BC7CompressionModeSIMD compressor(6, DBL_MAX); + double best = compressor.Compress(tempStream1, 0, &blockCluster); + if(best == 0.0f) { + memcpy(outBuf, tempBuf1, 16); + return; + } + + double error = DBL_MAX; + if((error = CompressTwoClusters(bestShapeIdx[0], bestClusters[0], tempBuf2, bestError[0])) < best) { + best = error; + if(error == 0.0f) { + memcpy(outBuf, tempBuf2, 16); + return; + } + else { + memcpy(tempBuf1, tempBuf2, 16); + } + } + + if(CompressThreeClusters(bestShapeIdx[1], bestClusters[1], tempBuf2, bestError[1]) < best) { + memcpy(outBuf, tempBuf2, 16); + return; + } + + memcpy(outBuf, tempBuf1, 16); + } + else { + assert(!"Don't support alpha yet!"); + } + } } diff --git a/BPTCEncoder/src/BCLookupTables.h b/BPTCEncoder/src/BCLookupTables.h index 55ed783..f28ca95 100755 --- a/BPTCEncoder/src/BCLookupTables.h +++ b/BPTCEncoder/src/BCLookupTables.h @@ -68,315 +68,315 @@ // tables to speed up this precision by allowing every value to be 1/3 of the way // between the two colors specified. /* - UINT nbits = 7; - UINT lastNum = -1; - UINT vals[255]; - UINT valIdx = 0; - for(UINT i = 0; i < 256; i++) { - UINT num = (i >> (8 - nbits)); - num <<= (8-nbits); - num |= i >> nbits; + UINT nbits = 7; + UINT lastNum = -1; + UINT vals[255]; + UINT valIdx = 0; + for(UINT i = 0; i < 256; i++) { + UINT num = (i >> (8 - nbits)); + num <<= (8-nbits); + num |= i >> nbits; - if(num != lastNum) { - lastNum = num; - vals[valIdx++] = num; - } - } + if(num != lastNum) { + lastNum = num; + vals[valIdx++] = num; + } + } - for(UINT i = 0; i < 256; i++) { + for(UINT i = 0; i < 256; i++) { - UINT mindist = 0xFFFFFFFF; - UINT minj = 0, mink = 0; + UINT mindist = 0xFFFFFFFF; + UINT minj = 0, mink = 0; - UINT tableEntry[2] = { 0, 0 }; + UINT tableEntry[2] = { 0, 0 }; - mindist = 0xFFFFFFFF; - minj = 0, mink = 0; + mindist = 0xFFFFFFFF; + minj = 0, mink = 0; - for(UINT j = 0; j < valIdx; j++) { - for(UINT k = 0; k < valIdx ; k++) { + for(UINT j = 0; j < valIdx; j++) { + for(UINT k = 0; k < valIdx ; k++) { - UINT combo = (43 * vals[j] + 21 * vals[k] + 32) >> 6; - UINT dist = ((i > combo) ? i - combo : combo - i); - if( dist < mindist ) - { - mindist = dist; - minj = j; - mink = k; - } - } - } + UINT combo = (43 * vals[j] + 21 * vals[k] + 32) >> 6; + UINT dist = ((i > combo) ? i - combo : combo - i); + if( dist < mindist ) + { + mindist = dist; + minj = j; + mink = k; + } + } + } - assert(mindist == 0); + assert(mindist == 0); - tableEntry[0] = vals[minj]; - tableEntry[1] = vals[mink]; + tableEntry[0] = vals[minj]; + tableEntry[1] = vals[mink]; - wchar_t tableEntryStr[256]; - swprintf(tableEntryStr, 256, L"{ 0x%02x, 0x%02x },\n", - tableEntry[0] >> (8 - nbits), - tableEntry[1] >> (8 - nbits) - ); - OutputDebugString(tableEntryStr); - } + wchar_t tableEntryStr[256]; + swprintf(tableEntryStr, 256, L"{ 0x%02x, 0x%02x },\n", + tableEntry[0] >> (8 - nbits), + tableEntry[1] >> (8 - nbits) + ); + OutputDebugString(tableEntryStr); + } */ static unsigned char Optimal7CompressBC7Mode5[256][2] = { - { 0x00, 0x00 }, - { 0x00, 0x01 }, - { 0x00, 0x03 }, - { 0x00, 0x04 }, - { 0x00, 0x06 }, - { 0x00, 0x07 }, - { 0x00, 0x09 }, - { 0x00, 0x0a }, - { 0x00, 0x0c }, - { 0x00, 0x0d }, - { 0x00, 0x0f }, - { 0x00, 0x10 }, - { 0x00, 0x12 }, - { 0x00, 0x14 }, - { 0x00, 0x15 }, - { 0x00, 0x17 }, - { 0x00, 0x18 }, - { 0x00, 0x1a }, - { 0x00, 0x1b }, - { 0x00, 0x1d }, - { 0x00, 0x1e }, - { 0x00, 0x20 }, - { 0x00, 0x21 }, - { 0x00, 0x23 }, - { 0x00, 0x24 }, - { 0x00, 0x26 }, - { 0x00, 0x27 }, - { 0x00, 0x29 }, - { 0x00, 0x2a }, - { 0x00, 0x2c }, - { 0x00, 0x2d }, - { 0x00, 0x2f }, - { 0x00, 0x30 }, - { 0x00, 0x32 }, - { 0x00, 0x34 }, - { 0x00, 0x35 }, - { 0x00, 0x37 }, - { 0x00, 0x38 }, - { 0x00, 0x3a }, - { 0x00, 0x3b }, - { 0x00, 0x3d }, - { 0x00, 0x3e }, - { 0x00, 0x40 }, - { 0x00, 0x41 }, - { 0x00, 0x42 }, - { 0x00, 0x44 }, - { 0x00, 0x45 }, - { 0x00, 0x47 }, - { 0x00, 0x48 }, - { 0x00, 0x4a }, - { 0x00, 0x4b }, - { 0x00, 0x4d }, - { 0x00, 0x4e }, - { 0x00, 0x50 }, - { 0x00, 0x52 }, - { 0x00, 0x53 }, - { 0x00, 0x55 }, - { 0x00, 0x56 }, - { 0x00, 0x58 }, - { 0x00, 0x59 }, - { 0x00, 0x5b }, - { 0x00, 0x5c }, - { 0x00, 0x5e }, - { 0x00, 0x5f }, - { 0x00, 0x61 }, - { 0x00, 0x62 }, - { 0x00, 0x64 }, - { 0x00, 0x65 }, - { 0x00, 0x67 }, - { 0x00, 0x68 }, - { 0x00, 0x6a }, - { 0x00, 0x6b }, - { 0x00, 0x6d }, - { 0x00, 0x6e }, - { 0x00, 0x70 }, - { 0x00, 0x72 }, - { 0x00, 0x73 }, - { 0x00, 0x75 }, - { 0x00, 0x76 }, - { 0x00, 0x78 }, - { 0x00, 0x79 }, - { 0x00, 0x7b }, - { 0x00, 0x7c }, - { 0x00, 0x7e }, - { 0x00, 0x7f }, - { 0x01, 0x7f }, - { 0x02, 0x7e }, - { 0x03, 0x7e }, - { 0x03, 0x7f }, - { 0x04, 0x7f }, - { 0x05, 0x7e }, - { 0x06, 0x7e }, - { 0x06, 0x7f }, - { 0x07, 0x7f }, - { 0x08, 0x7e }, - { 0x09, 0x7e }, - { 0x09, 0x7f }, - { 0x0a, 0x7f }, - { 0x0b, 0x7e }, - { 0x0c, 0x7e }, - { 0x0c, 0x7f }, - { 0x0d, 0x7f }, - { 0x0e, 0x7e }, - { 0x0f, 0x7d }, - { 0x0f, 0x7f }, - { 0x10, 0x7e }, - { 0x11, 0x7e }, - { 0x11, 0x7f }, - { 0x12, 0x7f }, - { 0x13, 0x7e }, - { 0x14, 0x7e }, - { 0x14, 0x7f }, - { 0x15, 0x7f }, - { 0x16, 0x7e }, - { 0x17, 0x7e }, - { 0x17, 0x7f }, - { 0x18, 0x7f }, - { 0x19, 0x7e }, - { 0x1a, 0x7e }, - { 0x1a, 0x7f }, - { 0x1b, 0x7f }, - { 0x1c, 0x7e }, - { 0x1d, 0x7e }, - { 0x1d, 0x7f }, - { 0x1e, 0x7f }, - { 0x1f, 0x7e }, - { 0x20, 0x7e }, - { 0x20, 0x7f }, - { 0x21, 0x7f }, - { 0x22, 0x7e }, - { 0x23, 0x7e }, - { 0x23, 0x7f }, - { 0x24, 0x7f }, - { 0x25, 0x7e }, - { 0x26, 0x7e }, - { 0x26, 0x7f }, - { 0x27, 0x7f }, - { 0x28, 0x7e }, - { 0x29, 0x7e }, - { 0x29, 0x7f }, - { 0x2a, 0x7f }, - { 0x2b, 0x7e }, - { 0x2c, 0x7e }, - { 0x2c, 0x7f }, - { 0x2d, 0x7f }, - { 0x2e, 0x7e }, - { 0x2f, 0x7d }, - { 0x2f, 0x7f }, - { 0x30, 0x7e }, - { 0x31, 0x7e }, - { 0x31, 0x7f }, - { 0x32, 0x7f }, - { 0x33, 0x7e }, - { 0x34, 0x7e }, - { 0x34, 0x7f }, - { 0x35, 0x7f }, - { 0x36, 0x7e }, - { 0x37, 0x7e }, - { 0x37, 0x7f }, - { 0x38, 0x7f }, - { 0x39, 0x7e }, - { 0x3a, 0x7e }, - { 0x3a, 0x7f }, - { 0x3b, 0x7f }, - { 0x3c, 0x7e }, - { 0x3d, 0x7e }, - { 0x3d, 0x7f }, - { 0x3e, 0x7f }, - { 0x3f, 0x7e }, - { 0x40, 0x7d }, - { 0x40, 0x7e }, - { 0x41, 0x7e }, - { 0x41, 0x7f }, - { 0x42, 0x7f }, - { 0x43, 0x7e }, - { 0x44, 0x7e }, - { 0x44, 0x7f }, - { 0x45, 0x7f }, - { 0x46, 0x7e }, - { 0x47, 0x7e }, - { 0x47, 0x7f }, - { 0x48, 0x7f }, - { 0x49, 0x7e }, - { 0x4a, 0x7e }, - { 0x4a, 0x7f }, - { 0x4b, 0x7f }, - { 0x4c, 0x7e }, - { 0x4d, 0x7d }, - { 0x4d, 0x7f }, - { 0x4e, 0x7e }, - { 0x4f, 0x7e }, - { 0x4f, 0x7f }, - { 0x50, 0x7f }, - { 0x51, 0x7e }, - { 0x52, 0x7e }, - { 0x52, 0x7f }, - { 0x53, 0x7f }, - { 0x54, 0x7e }, - { 0x55, 0x7e }, - { 0x55, 0x7f }, - { 0x56, 0x7f }, - { 0x57, 0x7e }, - { 0x58, 0x7e }, - { 0x58, 0x7f }, - { 0x59, 0x7f }, - { 0x5a, 0x7e }, - { 0x5b, 0x7e }, - { 0x5b, 0x7f }, - { 0x5c, 0x7f }, - { 0x5d, 0x7e }, - { 0x5e, 0x7e }, - { 0x5e, 0x7f }, - { 0x5f, 0x7f }, - { 0x60, 0x7e }, - { 0x61, 0x7e }, - { 0x61, 0x7f }, - { 0x62, 0x7f }, - { 0x63, 0x7e }, - { 0x64, 0x7e }, - { 0x64, 0x7f }, - { 0x65, 0x7f }, - { 0x66, 0x7e }, - { 0x67, 0x7e }, - { 0x67, 0x7f }, - { 0x68, 0x7f }, - { 0x69, 0x7e }, - { 0x6a, 0x7e }, - { 0x6a, 0x7f }, - { 0x6b, 0x7f }, - { 0x6c, 0x7e }, - { 0x6d, 0x7d }, - { 0x6d, 0x7f }, - { 0x6e, 0x7e }, - { 0x6f, 0x7e }, - { 0x6f, 0x7f }, - { 0x70, 0x7f }, - { 0x71, 0x7e }, - { 0x72, 0x7e }, - { 0x72, 0x7f }, - { 0x73, 0x7f }, - { 0x74, 0x7e }, - { 0x75, 0x7e }, - { 0x75, 0x7f }, - { 0x76, 0x7f }, - { 0x77, 0x7e }, - { 0x78, 0x7e }, - { 0x78, 0x7f }, - { 0x79, 0x7f }, - { 0x7a, 0x7e }, - { 0x7b, 0x7e }, - { 0x7b, 0x7f }, - { 0x7c, 0x7f }, - { 0x7d, 0x7e }, - { 0x7e, 0x7e }, - { 0x7e, 0x7f }, - { 0x7f, 0x7f } + { 0x00, 0x00 }, + { 0x00, 0x01 }, + { 0x00, 0x03 }, + { 0x00, 0x04 }, + { 0x00, 0x06 }, + { 0x00, 0x07 }, + { 0x00, 0x09 }, + { 0x00, 0x0a }, + { 0x00, 0x0c }, + { 0x00, 0x0d }, + { 0x00, 0x0f }, + { 0x00, 0x10 }, + { 0x00, 0x12 }, + { 0x00, 0x14 }, + { 0x00, 0x15 }, + { 0x00, 0x17 }, + { 0x00, 0x18 }, + { 0x00, 0x1a }, + { 0x00, 0x1b }, + { 0x00, 0x1d }, + { 0x00, 0x1e }, + { 0x00, 0x20 }, + { 0x00, 0x21 }, + { 0x00, 0x23 }, + { 0x00, 0x24 }, + { 0x00, 0x26 }, + { 0x00, 0x27 }, + { 0x00, 0x29 }, + { 0x00, 0x2a }, + { 0x00, 0x2c }, + { 0x00, 0x2d }, + { 0x00, 0x2f }, + { 0x00, 0x30 }, + { 0x00, 0x32 }, + { 0x00, 0x34 }, + { 0x00, 0x35 }, + { 0x00, 0x37 }, + { 0x00, 0x38 }, + { 0x00, 0x3a }, + { 0x00, 0x3b }, + { 0x00, 0x3d }, + { 0x00, 0x3e }, + { 0x00, 0x40 }, + { 0x00, 0x41 }, + { 0x00, 0x42 }, + { 0x00, 0x44 }, + { 0x00, 0x45 }, + { 0x00, 0x47 }, + { 0x00, 0x48 }, + { 0x00, 0x4a }, + { 0x00, 0x4b }, + { 0x00, 0x4d }, + { 0x00, 0x4e }, + { 0x00, 0x50 }, + { 0x00, 0x52 }, + { 0x00, 0x53 }, + { 0x00, 0x55 }, + { 0x00, 0x56 }, + { 0x00, 0x58 }, + { 0x00, 0x59 }, + { 0x00, 0x5b }, + { 0x00, 0x5c }, + { 0x00, 0x5e }, + { 0x00, 0x5f }, + { 0x00, 0x61 }, + { 0x00, 0x62 }, + { 0x00, 0x64 }, + { 0x00, 0x65 }, + { 0x00, 0x67 }, + { 0x00, 0x68 }, + { 0x00, 0x6a }, + { 0x00, 0x6b }, + { 0x00, 0x6d }, + { 0x00, 0x6e }, + { 0x00, 0x70 }, + { 0x00, 0x72 }, + { 0x00, 0x73 }, + { 0x00, 0x75 }, + { 0x00, 0x76 }, + { 0x00, 0x78 }, + { 0x00, 0x79 }, + { 0x00, 0x7b }, + { 0x00, 0x7c }, + { 0x00, 0x7e }, + { 0x00, 0x7f }, + { 0x01, 0x7f }, + { 0x02, 0x7e }, + { 0x03, 0x7e }, + { 0x03, 0x7f }, + { 0x04, 0x7f }, + { 0x05, 0x7e }, + { 0x06, 0x7e }, + { 0x06, 0x7f }, + { 0x07, 0x7f }, + { 0x08, 0x7e }, + { 0x09, 0x7e }, + { 0x09, 0x7f }, + { 0x0a, 0x7f }, + { 0x0b, 0x7e }, + { 0x0c, 0x7e }, + { 0x0c, 0x7f }, + { 0x0d, 0x7f }, + { 0x0e, 0x7e }, + { 0x0f, 0x7d }, + { 0x0f, 0x7f }, + { 0x10, 0x7e }, + { 0x11, 0x7e }, + { 0x11, 0x7f }, + { 0x12, 0x7f }, + { 0x13, 0x7e }, + { 0x14, 0x7e }, + { 0x14, 0x7f }, + { 0x15, 0x7f }, + { 0x16, 0x7e }, + { 0x17, 0x7e }, + { 0x17, 0x7f }, + { 0x18, 0x7f }, + { 0x19, 0x7e }, + { 0x1a, 0x7e }, + { 0x1a, 0x7f }, + { 0x1b, 0x7f }, + { 0x1c, 0x7e }, + { 0x1d, 0x7e }, + { 0x1d, 0x7f }, + { 0x1e, 0x7f }, + { 0x1f, 0x7e }, + { 0x20, 0x7e }, + { 0x20, 0x7f }, + { 0x21, 0x7f }, + { 0x22, 0x7e }, + { 0x23, 0x7e }, + { 0x23, 0x7f }, + { 0x24, 0x7f }, + { 0x25, 0x7e }, + { 0x26, 0x7e }, + { 0x26, 0x7f }, + { 0x27, 0x7f }, + { 0x28, 0x7e }, + { 0x29, 0x7e }, + { 0x29, 0x7f }, + { 0x2a, 0x7f }, + { 0x2b, 0x7e }, + { 0x2c, 0x7e }, + { 0x2c, 0x7f }, + { 0x2d, 0x7f }, + { 0x2e, 0x7e }, + { 0x2f, 0x7d }, + { 0x2f, 0x7f }, + { 0x30, 0x7e }, + { 0x31, 0x7e }, + { 0x31, 0x7f }, + { 0x32, 0x7f }, + { 0x33, 0x7e }, + { 0x34, 0x7e }, + { 0x34, 0x7f }, + { 0x35, 0x7f }, + { 0x36, 0x7e }, + { 0x37, 0x7e }, + { 0x37, 0x7f }, + { 0x38, 0x7f }, + { 0x39, 0x7e }, + { 0x3a, 0x7e }, + { 0x3a, 0x7f }, + { 0x3b, 0x7f }, + { 0x3c, 0x7e }, + { 0x3d, 0x7e }, + { 0x3d, 0x7f }, + { 0x3e, 0x7f }, + { 0x3f, 0x7e }, + { 0x40, 0x7d }, + { 0x40, 0x7e }, + { 0x41, 0x7e }, + { 0x41, 0x7f }, + { 0x42, 0x7f }, + { 0x43, 0x7e }, + { 0x44, 0x7e }, + { 0x44, 0x7f }, + { 0x45, 0x7f }, + { 0x46, 0x7e }, + { 0x47, 0x7e }, + { 0x47, 0x7f }, + { 0x48, 0x7f }, + { 0x49, 0x7e }, + { 0x4a, 0x7e }, + { 0x4a, 0x7f }, + { 0x4b, 0x7f }, + { 0x4c, 0x7e }, + { 0x4d, 0x7d }, + { 0x4d, 0x7f }, + { 0x4e, 0x7e }, + { 0x4f, 0x7e }, + { 0x4f, 0x7f }, + { 0x50, 0x7f }, + { 0x51, 0x7e }, + { 0x52, 0x7e }, + { 0x52, 0x7f }, + { 0x53, 0x7f }, + { 0x54, 0x7e }, + { 0x55, 0x7e }, + { 0x55, 0x7f }, + { 0x56, 0x7f }, + { 0x57, 0x7e }, + { 0x58, 0x7e }, + { 0x58, 0x7f }, + { 0x59, 0x7f }, + { 0x5a, 0x7e }, + { 0x5b, 0x7e }, + { 0x5b, 0x7f }, + { 0x5c, 0x7f }, + { 0x5d, 0x7e }, + { 0x5e, 0x7e }, + { 0x5e, 0x7f }, + { 0x5f, 0x7f }, + { 0x60, 0x7e }, + { 0x61, 0x7e }, + { 0x61, 0x7f }, + { 0x62, 0x7f }, + { 0x63, 0x7e }, + { 0x64, 0x7e }, + { 0x64, 0x7f }, + { 0x65, 0x7f }, + { 0x66, 0x7e }, + { 0x67, 0x7e }, + { 0x67, 0x7f }, + { 0x68, 0x7f }, + { 0x69, 0x7e }, + { 0x6a, 0x7e }, + { 0x6a, 0x7f }, + { 0x6b, 0x7f }, + { 0x6c, 0x7e }, + { 0x6d, 0x7d }, + { 0x6d, 0x7f }, + { 0x6e, 0x7e }, + { 0x6f, 0x7e }, + { 0x6f, 0x7f }, + { 0x70, 0x7f }, + { 0x71, 0x7e }, + { 0x72, 0x7e }, + { 0x72, 0x7f }, + { 0x73, 0x7f }, + { 0x74, 0x7e }, + { 0x75, 0x7e }, + { 0x75, 0x7f }, + { 0x76, 0x7f }, + { 0x77, 0x7e }, + { 0x78, 0x7e }, + { 0x78, 0x7f }, + { 0x79, 0x7f }, + { 0x7a, 0x7e }, + { 0x7b, 0x7e }, + { 0x7b, 0x7f }, + { 0x7c, 0x7f }, + { 0x7d, 0x7e }, + { 0x7e, 0x7e }, + { 0x7e, 0x7f }, + { 0x7f, 0x7f } }; // For each value, we give the best possible compression range for that value with 5 bits. @@ -389,605 +389,605 @@ static unsigned char Optimal7CompressBC7Mode5[256][2] = { // // The following tables were generated with the following program: /* - UINT nbits = 5; - UINT lastNum = -1; - UINT vals[255]; - UINT valIdx = 0; - for(UINT i = 0; i < 256; i++) { - UINT num = (i >> (8 - nbits)); - num <<= (8-nbits); - num |= i >> nbits; + UINT nbits = 5; + UINT lastNum = -1; + UINT vals[255]; + UINT valIdx = 0; + for(UINT i = 0; i < 256; i++) { + UINT num = (i >> (8 - nbits)); + num <<= (8-nbits); + num |= i >> nbits; - if(num != lastNum) { - lastNum = num; - vals[valIdx++] = num; - } - } + if(num != lastNum) { + lastNum = num; + vals[valIdx++] = num; + } + } - for(UINT i = 0; i < 256; i++) { + for(UINT i = 0; i < 256; i++) { - UINT mindist = 0xFFFFFFFF; - UINT minj = 0, mink = 0; + UINT mindist = 0xFFFFFFFF; + UINT minj = 0, mink = 0; - UINT tableEntry[2][4] = { {1, 0, 0, 0xFFFFFFFF}, {0, 0, 0, 0xFFFFFFFF} }; + UINT tableEntry[2][4] = { {1, 0, 0, 0xFFFFFFFF}, {0, 0, 0, 0xFFFFFFFF} }; - for(UINT j = 0; j < valIdx; j++) { - for(UINT k = j; k < valIdx ; k++) { + for(UINT j = 0; j < valIdx; j++) { + for(UINT k = j; k < valIdx ; k++) { - UINT combo = (vals[j] + vals[k]) / 2; - UINT dist = ((i > combo) ? i - combo : combo - i); - if( dist < mindist ) - { - mindist = dist; - minj = j; - mink = k; - } - } - } + UINT combo = (vals[j] + vals[k]) / 2; + UINT dist = ((i > combo) ? i - combo : combo - i); + if( dist < mindist ) + { + mindist = dist; + minj = j; + mink = k; + } + } + } - tableEntry[0][1] = vals[minj]; - tableEntry[0][2] = vals[mink]; - tableEntry[0][3] = mindist; + tableEntry[0][1] = vals[minj]; + tableEntry[0][2] = vals[mink]; + tableEntry[0][3] = mindist; - mindist = 0xFFFFFFFF; - minj = 0, mink = 0; + mindist = 0xFFFFFFFF; + minj = 0, mink = 0; - for(UINT j = 0; j < valIdx; j++) { - for(UINT k = j; k < valIdx ; k++) { + for(UINT j = 0; j < valIdx; j++) { + for(UINT k = j; k < valIdx ; k++) { - UINT combo = (2 * vals[j] + vals[k]) / 3; - UINT dist = ((i > combo) ? i - combo : combo - i); - if( dist < mindist ) - { - mindist = dist; - minj = j; - mink = k; - } - } - } + UINT combo = (2 * vals[j] + vals[k]) / 3; + UINT dist = ((i > combo) ? i - combo : combo - i); + if( dist < mindist ) + { + mindist = dist; + minj = j; + mink = k; + } + } + } - tableEntry[1][1] = vals[minj]; - tableEntry[1][2] = vals[mink]; - tableEntry[1][3] = mindist; + tableEntry[1][1] = vals[minj]; + tableEntry[1][2] = vals[mink]; + tableEntry[1][3] = mindist; - wchar_t tableEntryStr[256]; - if(tableEntry[1][3] > tableEntry[0][3]) { - swprintf(tableEntryStr, 256, L"{ { %d, 0x%02x, 0x%02x }, { %d, 0x%02x, 0x%02x } },\n", - tableEntry[0][0], - tableEntry[0][1] >> (8 - nbits), - tableEntry[0][2] >> (8 - nbits), - tableEntry[1][0], - tableEntry[1][1] >> (8 - nbits), - tableEntry[1][2] >> (8 - nbits) - ); - } - else { - swprintf(tableEntryStr, 256, L"{ { %d, 0x%02x, 0x%02x }, { %d, 0x%02x, 0x%02x } },\n", - tableEntry[1][0], - tableEntry[1][1] >> (8 - nbits), - tableEntry[1][2] >> (8 - nbits), - tableEntry[0][0], - tableEntry[0][1] >> (8 - nbits), - tableEntry[0][2] >> (8 - nbits) - ); - } - OutputDebugString(tableEntryStr); - } + wchar_t tableEntryStr[256]; + if(tableEntry[1][3] > tableEntry[0][3]) { + swprintf(tableEntryStr, 256, L"{ { %d, 0x%02x, 0x%02x }, { %d, 0x%02x, 0x%02x } },\n", + tableEntry[0][0], + tableEntry[0][1] >> (8 - nbits), + tableEntry[0][2] >> (8 - nbits), + tableEntry[1][0], + tableEntry[1][1] >> (8 - nbits), + tableEntry[1][2] >> (8 - nbits) + ); + } + else { + swprintf(tableEntryStr, 256, L"{ { %d, 0x%02x, 0x%02x }, { %d, 0x%02x, 0x%02x } },\n", + tableEntry[1][0], + tableEntry[1][1] >> (8 - nbits), + tableEntry[1][2] >> (8 - nbits), + tableEntry[0][0], + tableEntry[0][1] >> (8 - nbits), + tableEntry[0][2] >> (8 - nbits) + ); + } + OutputDebugString(tableEntryStr); + } static unsigned char Optimal5CompressDXT1[256][2][3] = { - { { 0, 0x00, 0x00 }, { 1, 0x00, 0x00 } }, - { { 0, 0x00, 0x00 }, { 1, 0x00, 0x00 } }, - { { 0, 0x00, 0x01 }, { 1, 0x00, 0x00 } }, - { { 0, 0x00, 0x01 }, { 1, 0x00, 0x01 } }, - { { 1, 0x00, 0x01 }, { 0, 0x00, 0x02 } }, - { { 0, 0x00, 0x02 }, { 1, 0x00, 0x01 } }, - { { 0, 0x00, 0x02 }, { 1, 0x00, 0x01 } }, - { { 0, 0x00, 0x03 }, { 1, 0x00, 0x02 } }, - { { 0, 0x00, 0x03 }, { 1, 0x00, 0x02 } }, - { { 0, 0x00, 0x03 }, { 1, 0x00, 0x02 } }, - { { 0, 0x01, 0x02 }, { 1, 0x00, 0x02 } }, - { { 0, 0x00, 0x04 }, { 1, 0x00, 0x03 } }, - { { 1, 0x00, 0x03 }, { 0, 0x00, 0x04 } }, - { { 0, 0x00, 0x05 }, { 1, 0x00, 0x03 } }, - { { 0, 0x00, 0x05 }, { 1, 0x00, 0x03 } }, - { { 0, 0x00, 0x06 }, { 1, 0x00, 0x04 } }, - { { 0, 0x00, 0x06 }, { 1, 0x00, 0x04 } }, - { { 0, 0x00, 0x06 }, { 1, 0x00, 0x04 } }, - { { 0, 0x02, 0x03 }, { 1, 0x00, 0x04 } }, - { { 0, 0x00, 0x07 }, { 1, 0x00, 0x05 } }, - { { 1, 0x00, 0x05 }, { 0, 0x00, 0x07 } }, - { { 0, 0x01, 0x06 }, { 1, 0x00, 0x05 } }, - { { 0, 0x00, 0x08 }, { 1, 0x00, 0x05 } }, - { { 0, 0x00, 0x08 }, { 1, 0x00, 0x06 } }, - { { 0, 0x00, 0x09 }, { 1, 0x00, 0x06 } }, - { { 0, 0x00, 0x09 }, { 1, 0x00, 0x06 } }, - { { 0, 0x00, 0x0a }, { 1, 0x00, 0x06 } }, - { { 0, 0x00, 0x0a }, { 1, 0x00, 0x07 } }, - { { 1, 0x00, 0x07 }, { 0, 0x00, 0x0a } }, - { { 0, 0x02, 0x07 }, { 1, 0x00, 0x07 } }, - { { 0, 0x00, 0x0b }, { 1, 0x00, 0x07 } }, - { { 0, 0x00, 0x0b }, { 1, 0x01, 0x07 } }, - { { 0, 0x01, 0x0a }, { 1, 0x01, 0x07 } }, - { { 0, 0x00, 0x0c }, { 1, 0x00, 0x08 } }, - { { 0, 0x00, 0x0c }, { 1, 0x00, 0x08 } }, - { { 0, 0x00, 0x0d }, { 1, 0x02, 0x07 } }, - { { 1, 0x02, 0x07 }, { 0, 0x00, 0x0d } }, - { { 1, 0x00, 0x09 }, { 0, 0x00, 0x0e } }, - { { 0, 0x00, 0x0e }, { 1, 0x00, 0x09 } }, - { { 0, 0x00, 0x0e }, { 1, 0x03, 0x07 } }, - { { 0, 0x02, 0x0b }, { 1, 0x03, 0x07 } }, - { { 0, 0x00, 0x0f }, { 1, 0x00, 0x0a } }, - { { 0, 0x00, 0x0f }, { 1, 0x00, 0x0a } }, - { { 0, 0x01, 0x0e }, { 1, 0x00, 0x0a } }, - { { 0, 0x00, 0x10 }, { 1, 0x00, 0x0b } }, - { { 1, 0x00, 0x0b }, { 0, 0x00, 0x10 } }, - { { 0, 0x00, 0x11 }, { 1, 0x00, 0x0b } }, - { { 0, 0x00, 0x11 }, { 1, 0x00, 0x0b } }, - { { 0, 0x00, 0x12 }, { 1, 0x00, 0x0c } }, - { { 0, 0x00, 0x12 }, { 1, 0x00, 0x0c } }, - { { 0, 0x00, 0x12 }, { 1, 0x00, 0x0c } }, - { { 0, 0x02, 0x0f }, { 1, 0x00, 0x0c } }, - { { 0, 0x00, 0x13 }, { 1, 0x00, 0x0d } }, - { { 1, 0x00, 0x0d }, { 0, 0x00, 0x13 } }, - { { 0, 0x01, 0x12 }, { 1, 0x00, 0x0d } }, - { { 0, 0x00, 0x14 }, { 1, 0x00, 0x0d } }, - { { 0, 0x00, 0x14 }, { 1, 0x00, 0x0e } }, - { { 0, 0x00, 0x15 }, { 1, 0x00, 0x0e } }, - { { 0, 0x00, 0x15 }, { 1, 0x00, 0x0e } }, - { { 0, 0x00, 0x16 }, { 1, 0x00, 0x0e } }, - { { 0, 0x00, 0x16 }, { 1, 0x00, 0x0f } }, - { { 1, 0x00, 0x0f }, { 0, 0x00, 0x16 } }, - { { 0, 0x02, 0x13 }, { 1, 0x00, 0x0f } }, - { { 0, 0x00, 0x17 }, { 1, 0x00, 0x0f } }, - { { 0, 0x00, 0x17 }, { 1, 0x01, 0x0f } }, - { { 0, 0x01, 0x16 }, { 1, 0x01, 0x0f } }, - { { 0, 0x00, 0x18 }, { 1, 0x00, 0x10 } }, - { { 0, 0x00, 0x18 }, { 1, 0x00, 0x10 } }, - { { 0, 0x00, 0x19 }, { 1, 0x02, 0x0f } }, - { { 1, 0x02, 0x0f }, { 0, 0x00, 0x19 } }, - { { 1, 0x00, 0x11 }, { 0, 0x00, 0x1a } }, - { { 0, 0x00, 0x1a }, { 1, 0x00, 0x11 } }, - { { 0, 0x00, 0x1a }, { 1, 0x03, 0x0f } }, - { { 0, 0x02, 0x17 }, { 1, 0x03, 0x0f } }, - { { 0, 0x00, 0x1b }, { 1, 0x00, 0x12 } }, - { { 0, 0x00, 0x1b }, { 1, 0x00, 0x12 } }, - { { 0, 0x01, 0x1a }, { 1, 0x00, 0x12 } }, - { { 0, 0x00, 0x1c }, { 1, 0x00, 0x13 } }, - { { 1, 0x00, 0x13 }, { 0, 0x00, 0x1c } }, - { { 0, 0x00, 0x1d }, { 1, 0x00, 0x13 } }, - { { 0, 0x00, 0x1d }, { 1, 0x00, 0x13 } }, - { { 0, 0x00, 0x1e }, { 1, 0x00, 0x14 } }, - { { 0, 0x00, 0x1e }, { 1, 0x00, 0x14 } }, - { { 0, 0x00, 0x1e }, { 1, 0x00, 0x14 } }, - { { 0, 0x02, 0x1b }, { 1, 0x00, 0x14 } }, - { { 0, 0x00, 0x1f }, { 1, 0x00, 0x15 } }, - { { 1, 0x00, 0x15 }, { 0, 0x00, 0x1f } }, - { { 0, 0x01, 0x1e }, { 1, 0x00, 0x15 } }, - { { 0, 0x04, 0x18 }, { 1, 0x00, 0x15 } }, - { { 0, 0x01, 0x1f }, { 1, 0x00, 0x16 } }, - { { 0, 0x01, 0x1f }, { 1, 0x00, 0x16 } }, - { { 0, 0x01, 0x1f }, { 1, 0x00, 0x16 } }, - { { 0, 0x02, 0x1e }, { 1, 0x00, 0x16 } }, - { { 0, 0x02, 0x1e }, { 1, 0x00, 0x17 } }, - { { 1, 0x00, 0x17 }, { 0, 0x02, 0x1e } }, - { { 0, 0x02, 0x1f }, { 1, 0x00, 0x17 } }, - { { 0, 0x04, 0x1b }, { 1, 0x00, 0x17 } }, - { { 0, 0x03, 0x1e }, { 1, 0x01, 0x17 } }, - { { 0, 0x03, 0x1e }, { 1, 0x01, 0x17 } }, - { { 0, 0x04, 0x1c }, { 1, 0x00, 0x18 } }, - { { 0, 0x03, 0x1f }, { 1, 0x00, 0x18 } }, - { { 0, 0x03, 0x1f }, { 1, 0x02, 0x17 } }, - { { 1, 0x02, 0x17 }, { 0, 0x03, 0x1f } }, - { { 1, 0x00, 0x19 }, { 0, 0x04, 0x1e } }, - { { 0, 0x04, 0x1e }, { 1, 0x00, 0x19 } }, - { { 0, 0x04, 0x1e }, { 1, 0x03, 0x17 } }, - { { 0, 0x06, 0x1b }, { 1, 0x03, 0x17 } }, - { { 0, 0x04, 0x1f }, { 1, 0x00, 0x1a } }, - { { 0, 0x04, 0x1f }, { 1, 0x00, 0x1a } }, - { { 0, 0x05, 0x1e }, { 1, 0x00, 0x1a } }, - { { 0, 0x08, 0x18 }, { 1, 0x00, 0x1b } }, - { { 1, 0x00, 0x1b }, { 0, 0x05, 0x1f } }, - { { 0, 0x05, 0x1f }, { 1, 0x00, 0x1b } }, - { { 0, 0x05, 0x1f }, { 1, 0x00, 0x1b } }, - { { 0, 0x06, 0x1e }, { 1, 0x00, 0x1c } }, - { { 0, 0x06, 0x1e }, { 1, 0x00, 0x1c } }, - { { 0, 0x06, 0x1e }, { 1, 0x00, 0x1c } }, - { { 0, 0x06, 0x1f }, { 1, 0x00, 0x1c } }, - { { 0, 0x08, 0x1b }, { 1, 0x00, 0x1d } }, - { { 1, 0x00, 0x1d }, { 0, 0x07, 0x1e } }, - { { 0, 0x07, 0x1e }, { 1, 0x00, 0x1d } }, - { { 0, 0x08, 0x1c }, { 1, 0x00, 0x1d } }, - { { 0, 0x07, 0x1f }, { 1, 0x00, 0x1e } }, - { { 0, 0x07, 0x1f }, { 1, 0x00, 0x1e } }, - { { 0, 0x07, 0x1f }, { 1, 0x00, 0x1e } }, - { { 0, 0x08, 0x1e }, { 1, 0x00, 0x1e } }, - { { 0, 0x08, 0x1e }, { 1, 0x00, 0x1f } }, - { { 1, 0x00, 0x1f }, { 0, 0x08, 0x1e } }, - { { 0, 0x0a, 0x1b }, { 1, 0x00, 0x1f } }, - { { 0, 0x08, 0x1f }, { 1, 0x00, 0x1f } }, - { { 0, 0x08, 0x1f }, { 1, 0x01, 0x1f } }, - { { 0, 0x09, 0x1e }, { 1, 0x01, 0x1f } }, - { { 0, 0x0c, 0x18 }, { 1, 0x04, 0x1c } }, - { { 0, 0x09, 0x1f }, { 1, 0x04, 0x1c } }, - { { 0, 0x09, 0x1f }, { 1, 0x02, 0x1f } }, - { { 1, 0x02, 0x1f }, { 0, 0x09, 0x1f } }, - { { 1, 0x04, 0x1d }, { 0, 0x0a, 0x1e } }, - { { 0, 0x0a, 0x1e }, { 1, 0x04, 0x1d } }, - { { 0, 0x0a, 0x1e }, { 1, 0x03, 0x1f } }, - { { 0, 0x0a, 0x1f }, { 1, 0x03, 0x1f } }, - { { 0, 0x0c, 0x1b }, { 1, 0x04, 0x1e } }, - { { 0, 0x0b, 0x1e }, { 1, 0x04, 0x1e } }, - { { 0, 0x0b, 0x1e }, { 1, 0x04, 0x1e } }, - { { 0, 0x0c, 0x1c }, { 1, 0x04, 0x1f } }, - { { 1, 0x04, 0x1f }, { 0, 0x0b, 0x1f } }, - { { 0, 0x0b, 0x1f }, { 1, 0x04, 0x1f } }, - { { 0, 0x0b, 0x1f }, { 1, 0x04, 0x1f } }, - { { 0, 0x0c, 0x1e }, { 1, 0x05, 0x1f } }, - { { 0, 0x0c, 0x1e }, { 1, 0x05, 0x1f } }, - { { 0, 0x0c, 0x1e }, { 1, 0x05, 0x1f } }, - { { 0, 0x0e, 0x1b }, { 1, 0x05, 0x1f } }, - { { 0, 0x0c, 0x1f }, { 1, 0x06, 0x1f } }, - { { 1, 0x06, 0x1f }, { 0, 0x0c, 0x1f } }, - { { 0, 0x0d, 0x1e }, { 1, 0x06, 0x1f } }, - { { 0, 0x10, 0x18 }, { 1, 0x06, 0x1f } }, - { { 0, 0x0d, 0x1f }, { 1, 0x07, 0x1f } }, - { { 0, 0x0d, 0x1f }, { 1, 0x07, 0x1f } }, - { { 0, 0x0d, 0x1f }, { 1, 0x07, 0x1f } }, - { { 0, 0x0e, 0x1e }, { 1, 0x07, 0x1f } }, - { { 0, 0x0e, 0x1e }, { 1, 0x08, 0x1f } }, - { { 1, 0x08, 0x1f }, { 0, 0x0e, 0x1e } }, - { { 0, 0x0e, 0x1f }, { 1, 0x08, 0x1f } }, - { { 0, 0x10, 0x1b }, { 1, 0x08, 0x1f } }, - { { 0, 0x0f, 0x1e }, { 1, 0x09, 0x1f } }, - { { 0, 0x0f, 0x1e }, { 1, 0x09, 0x1f } }, - { { 0, 0x10, 0x1c }, { 1, 0x0c, 0x1c } }, - { { 0, 0x0f, 0x1f }, { 1, 0x0c, 0x1c } }, - { { 0, 0x0f, 0x1f }, { 1, 0x0a, 0x1f } }, - { { 1, 0x0a, 0x1f }, { 0, 0x0f, 0x1f } }, - { { 1, 0x0c, 0x1d }, { 0, 0x10, 0x1e } }, - { { 0, 0x10, 0x1e }, { 1, 0x0c, 0x1d } }, - { { 0, 0x10, 0x1e }, { 1, 0x0b, 0x1f } }, - { { 0, 0x12, 0x1b }, { 1, 0x0b, 0x1f } }, - { { 0, 0x10, 0x1f }, { 1, 0x0c, 0x1e } }, - { { 0, 0x10, 0x1f }, { 1, 0x0c, 0x1e } }, - { { 0, 0x11, 0x1e }, { 1, 0x0c, 0x1e } }, - { { 0, 0x14, 0x18 }, { 1, 0x0c, 0x1f } }, - { { 1, 0x0c, 0x1f }, { 0, 0x11, 0x1f } }, - { { 0, 0x11, 0x1f }, { 1, 0x0c, 0x1f } }, - { { 0, 0x11, 0x1f }, { 1, 0x0c, 0x1f } }, - { { 0, 0x12, 0x1e }, { 1, 0x0d, 0x1f } }, - { { 0, 0x12, 0x1e }, { 1, 0x0d, 0x1f } }, - { { 0, 0x12, 0x1e }, { 1, 0x0d, 0x1f } }, - { { 0, 0x12, 0x1f }, { 1, 0x0d, 0x1f } }, - { { 0, 0x14, 0x1b }, { 1, 0x0e, 0x1f } }, - { { 1, 0x0e, 0x1f }, { 0, 0x13, 0x1e } }, - { { 0, 0x13, 0x1e }, { 1, 0x0e, 0x1f } }, - { { 0, 0x14, 0x1c }, { 1, 0x0e, 0x1f } }, - { { 0, 0x13, 0x1f }, { 1, 0x0f, 0x1f } }, - { { 0, 0x13, 0x1f }, { 1, 0x0f, 0x1f } }, - { { 0, 0x13, 0x1f }, { 1, 0x0f, 0x1f } }, - { { 0, 0x14, 0x1e }, { 1, 0x0f, 0x1f } }, - { { 0, 0x14, 0x1e }, { 1, 0x10, 0x1f } }, - { { 1, 0x10, 0x1f }, { 0, 0x14, 0x1e } }, - { { 0, 0x16, 0x1b }, { 1, 0x10, 0x1f } }, - { { 0, 0x14, 0x1f }, { 1, 0x10, 0x1f } }, - { { 0, 0x14, 0x1f }, { 1, 0x11, 0x1f } }, - { { 0, 0x15, 0x1e }, { 1, 0x11, 0x1f } }, - { { 0, 0x18, 0x18 }, { 1, 0x14, 0x1c } }, - { { 0, 0x15, 0x1f }, { 1, 0x14, 0x1c } }, - { { 0, 0x15, 0x1f }, { 1, 0x12, 0x1f } }, - { { 1, 0x12, 0x1f }, { 0, 0x15, 0x1f } }, - { { 1, 0x14, 0x1d }, { 0, 0x16, 0x1e } }, - { { 0, 0x16, 0x1e }, { 1, 0x14, 0x1d } }, - { { 0, 0x16, 0x1e }, { 1, 0x13, 0x1f } }, - { { 0, 0x16, 0x1f }, { 1, 0x13, 0x1f } }, - { { 0, 0x18, 0x1b }, { 1, 0x14, 0x1e } }, - { { 0, 0x17, 0x1e }, { 1, 0x14, 0x1e } }, - { { 0, 0x17, 0x1e }, { 1, 0x14, 0x1e } }, - { { 0, 0x18, 0x1c }, { 1, 0x14, 0x1f } }, - { { 1, 0x14, 0x1f }, { 0, 0x17, 0x1f } }, - { { 0, 0x17, 0x1f }, { 1, 0x14, 0x1f } }, - { { 0, 0x17, 0x1f }, { 1, 0x14, 0x1f } }, - { { 0, 0x18, 0x1e }, { 1, 0x15, 0x1f } }, - { { 0, 0x18, 0x1e }, { 1, 0x15, 0x1f } }, - { { 0, 0x18, 0x1e }, { 1, 0x15, 0x1f } }, - { { 0, 0x1a, 0x1b }, { 1, 0x15, 0x1f } }, - { { 0, 0x18, 0x1f }, { 1, 0x16, 0x1f } }, - { { 1, 0x16, 0x1f }, { 0, 0x18, 0x1f } }, - { { 0, 0x19, 0x1e }, { 1, 0x16, 0x1f } }, - { { 0, 0x19, 0x1e }, { 1, 0x16, 0x1f } }, - { { 0, 0x19, 0x1f }, { 1, 0x17, 0x1f } }, - { { 0, 0x19, 0x1f }, { 1, 0x17, 0x1f } }, - { { 0, 0x19, 0x1f }, { 1, 0x17, 0x1f } }, - { { 0, 0x1a, 0x1e }, { 1, 0x17, 0x1f } }, - { { 0, 0x1a, 0x1e }, { 1, 0x18, 0x1f } }, - { { 1, 0x18, 0x1f }, { 0, 0x1a, 0x1e } }, - { { 0, 0x1a, 0x1f }, { 1, 0x18, 0x1f } }, - { { 0, 0x1a, 0x1f }, { 1, 0x18, 0x1f } }, - { { 0, 0x1b, 0x1e }, { 1, 0x19, 0x1f } }, - { { 0, 0x1b, 0x1e }, { 1, 0x19, 0x1f } }, - { { 0, 0x1c, 0x1c }, { 1, 0x1c, 0x1c } }, - { { 0, 0x1b, 0x1f }, { 1, 0x1c, 0x1c } }, - { { 0, 0x1b, 0x1f }, { 1, 0x1a, 0x1f } }, - { { 1, 0x1a, 0x1f }, { 0, 0x1b, 0x1f } }, - { { 1, 0x1c, 0x1d }, { 0, 0x1c, 0x1e } }, - { { 0, 0x1c, 0x1e }, { 1, 0x1c, 0x1d } }, - { { 0, 0x1c, 0x1e }, { 1, 0x1b, 0x1f } }, - { { 1, 0x1b, 0x1f }, { 0, 0x1c, 0x1f } }, - { { 0, 0x1c, 0x1f }, { 1, 0x1c, 0x1e } }, - { { 0, 0x1c, 0x1f }, { 1, 0x1c, 0x1e } }, - { { 0, 0x1d, 0x1e }, { 1, 0x1c, 0x1e } }, - { { 0, 0x1d, 0x1e }, { 1, 0x1c, 0x1f } }, - { { 1, 0x1c, 0x1f }, { 0, 0x1d, 0x1f } }, - { { 0, 0x1d, 0x1f }, { 1, 0x1c, 0x1f } }, - { { 0, 0x1d, 0x1f }, { 1, 0x1c, 0x1f } }, - { { 0, 0x1e, 0x1e }, { 1, 0x1d, 0x1f } }, - { { 0, 0x1e, 0x1e }, { 1, 0x1d, 0x1f } }, - { { 0, 0x1e, 0x1e }, { 1, 0x1d, 0x1f } }, - { { 0, 0x1e, 0x1f }, { 1, 0x1d, 0x1f } }, - { { 0, 0x1e, 0x1f }, { 1, 0x1e, 0x1f } }, - { { 1, 0x1e, 0x1f }, { 0, 0x1e, 0x1f } }, - { { 1, 0x1e, 0x1f }, { 0, 0x1e, 0x1f } }, - { { 0, 0x1f, 0x1f }, { 1, 0x1e, 0x1f } }, - { { 0, 0x1f, 0x1f }, { 1, 0x1f, 0x1f } }, - { { 0, 0x1f, 0x1f }, { 1, 0x1f, 0x1f } } + { { 0, 0x00, 0x00 }, { 1, 0x00, 0x00 } }, + { { 0, 0x00, 0x00 }, { 1, 0x00, 0x00 } }, + { { 0, 0x00, 0x01 }, { 1, 0x00, 0x00 } }, + { { 0, 0x00, 0x01 }, { 1, 0x00, 0x01 } }, + { { 1, 0x00, 0x01 }, { 0, 0x00, 0x02 } }, + { { 0, 0x00, 0x02 }, { 1, 0x00, 0x01 } }, + { { 0, 0x00, 0x02 }, { 1, 0x00, 0x01 } }, + { { 0, 0x00, 0x03 }, { 1, 0x00, 0x02 } }, + { { 0, 0x00, 0x03 }, { 1, 0x00, 0x02 } }, + { { 0, 0x00, 0x03 }, { 1, 0x00, 0x02 } }, + { { 0, 0x01, 0x02 }, { 1, 0x00, 0x02 } }, + { { 0, 0x00, 0x04 }, { 1, 0x00, 0x03 } }, + { { 1, 0x00, 0x03 }, { 0, 0x00, 0x04 } }, + { { 0, 0x00, 0x05 }, { 1, 0x00, 0x03 } }, + { { 0, 0x00, 0x05 }, { 1, 0x00, 0x03 } }, + { { 0, 0x00, 0x06 }, { 1, 0x00, 0x04 } }, + { { 0, 0x00, 0x06 }, { 1, 0x00, 0x04 } }, + { { 0, 0x00, 0x06 }, { 1, 0x00, 0x04 } }, + { { 0, 0x02, 0x03 }, { 1, 0x00, 0x04 } }, + { { 0, 0x00, 0x07 }, { 1, 0x00, 0x05 } }, + { { 1, 0x00, 0x05 }, { 0, 0x00, 0x07 } }, + { { 0, 0x01, 0x06 }, { 1, 0x00, 0x05 } }, + { { 0, 0x00, 0x08 }, { 1, 0x00, 0x05 } }, + { { 0, 0x00, 0x08 }, { 1, 0x00, 0x06 } }, + { { 0, 0x00, 0x09 }, { 1, 0x00, 0x06 } }, + { { 0, 0x00, 0x09 }, { 1, 0x00, 0x06 } }, + { { 0, 0x00, 0x0a }, { 1, 0x00, 0x06 } }, + { { 0, 0x00, 0x0a }, { 1, 0x00, 0x07 } }, + { { 1, 0x00, 0x07 }, { 0, 0x00, 0x0a } }, + { { 0, 0x02, 0x07 }, { 1, 0x00, 0x07 } }, + { { 0, 0x00, 0x0b }, { 1, 0x00, 0x07 } }, + { { 0, 0x00, 0x0b }, { 1, 0x01, 0x07 } }, + { { 0, 0x01, 0x0a }, { 1, 0x01, 0x07 } }, + { { 0, 0x00, 0x0c }, { 1, 0x00, 0x08 } }, + { { 0, 0x00, 0x0c }, { 1, 0x00, 0x08 } }, + { { 0, 0x00, 0x0d }, { 1, 0x02, 0x07 } }, + { { 1, 0x02, 0x07 }, { 0, 0x00, 0x0d } }, + { { 1, 0x00, 0x09 }, { 0, 0x00, 0x0e } }, + { { 0, 0x00, 0x0e }, { 1, 0x00, 0x09 } }, + { { 0, 0x00, 0x0e }, { 1, 0x03, 0x07 } }, + { { 0, 0x02, 0x0b }, { 1, 0x03, 0x07 } }, + { { 0, 0x00, 0x0f }, { 1, 0x00, 0x0a } }, + { { 0, 0x00, 0x0f }, { 1, 0x00, 0x0a } }, + { { 0, 0x01, 0x0e }, { 1, 0x00, 0x0a } }, + { { 0, 0x00, 0x10 }, { 1, 0x00, 0x0b } }, + { { 1, 0x00, 0x0b }, { 0, 0x00, 0x10 } }, + { { 0, 0x00, 0x11 }, { 1, 0x00, 0x0b } }, + { { 0, 0x00, 0x11 }, { 1, 0x00, 0x0b } }, + { { 0, 0x00, 0x12 }, { 1, 0x00, 0x0c } }, + { { 0, 0x00, 0x12 }, { 1, 0x00, 0x0c } }, + { { 0, 0x00, 0x12 }, { 1, 0x00, 0x0c } }, + { { 0, 0x02, 0x0f }, { 1, 0x00, 0x0c } }, + { { 0, 0x00, 0x13 }, { 1, 0x00, 0x0d } }, + { { 1, 0x00, 0x0d }, { 0, 0x00, 0x13 } }, + { { 0, 0x01, 0x12 }, { 1, 0x00, 0x0d } }, + { { 0, 0x00, 0x14 }, { 1, 0x00, 0x0d } }, + { { 0, 0x00, 0x14 }, { 1, 0x00, 0x0e } }, + { { 0, 0x00, 0x15 }, { 1, 0x00, 0x0e } }, + { { 0, 0x00, 0x15 }, { 1, 0x00, 0x0e } }, + { { 0, 0x00, 0x16 }, { 1, 0x00, 0x0e } }, + { { 0, 0x00, 0x16 }, { 1, 0x00, 0x0f } }, + { { 1, 0x00, 0x0f }, { 0, 0x00, 0x16 } }, + { { 0, 0x02, 0x13 }, { 1, 0x00, 0x0f } }, + { { 0, 0x00, 0x17 }, { 1, 0x00, 0x0f } }, + { { 0, 0x00, 0x17 }, { 1, 0x01, 0x0f } }, + { { 0, 0x01, 0x16 }, { 1, 0x01, 0x0f } }, + { { 0, 0x00, 0x18 }, { 1, 0x00, 0x10 } }, + { { 0, 0x00, 0x18 }, { 1, 0x00, 0x10 } }, + { { 0, 0x00, 0x19 }, { 1, 0x02, 0x0f } }, + { { 1, 0x02, 0x0f }, { 0, 0x00, 0x19 } }, + { { 1, 0x00, 0x11 }, { 0, 0x00, 0x1a } }, + { { 0, 0x00, 0x1a }, { 1, 0x00, 0x11 } }, + { { 0, 0x00, 0x1a }, { 1, 0x03, 0x0f } }, + { { 0, 0x02, 0x17 }, { 1, 0x03, 0x0f } }, + { { 0, 0x00, 0x1b }, { 1, 0x00, 0x12 } }, + { { 0, 0x00, 0x1b }, { 1, 0x00, 0x12 } }, + { { 0, 0x01, 0x1a }, { 1, 0x00, 0x12 } }, + { { 0, 0x00, 0x1c }, { 1, 0x00, 0x13 } }, + { { 1, 0x00, 0x13 }, { 0, 0x00, 0x1c } }, + { { 0, 0x00, 0x1d }, { 1, 0x00, 0x13 } }, + { { 0, 0x00, 0x1d }, { 1, 0x00, 0x13 } }, + { { 0, 0x00, 0x1e }, { 1, 0x00, 0x14 } }, + { { 0, 0x00, 0x1e }, { 1, 0x00, 0x14 } }, + { { 0, 0x00, 0x1e }, { 1, 0x00, 0x14 } }, + { { 0, 0x02, 0x1b }, { 1, 0x00, 0x14 } }, + { { 0, 0x00, 0x1f }, { 1, 0x00, 0x15 } }, + { { 1, 0x00, 0x15 }, { 0, 0x00, 0x1f } }, + { { 0, 0x01, 0x1e }, { 1, 0x00, 0x15 } }, + { { 0, 0x04, 0x18 }, { 1, 0x00, 0x15 } }, + { { 0, 0x01, 0x1f }, { 1, 0x00, 0x16 } }, + { { 0, 0x01, 0x1f }, { 1, 0x00, 0x16 } }, + { { 0, 0x01, 0x1f }, { 1, 0x00, 0x16 } }, + { { 0, 0x02, 0x1e }, { 1, 0x00, 0x16 } }, + { { 0, 0x02, 0x1e }, { 1, 0x00, 0x17 } }, + { { 1, 0x00, 0x17 }, { 0, 0x02, 0x1e } }, + { { 0, 0x02, 0x1f }, { 1, 0x00, 0x17 } }, + { { 0, 0x04, 0x1b }, { 1, 0x00, 0x17 } }, + { { 0, 0x03, 0x1e }, { 1, 0x01, 0x17 } }, + { { 0, 0x03, 0x1e }, { 1, 0x01, 0x17 } }, + { { 0, 0x04, 0x1c }, { 1, 0x00, 0x18 } }, + { { 0, 0x03, 0x1f }, { 1, 0x00, 0x18 } }, + { { 0, 0x03, 0x1f }, { 1, 0x02, 0x17 } }, + { { 1, 0x02, 0x17 }, { 0, 0x03, 0x1f } }, + { { 1, 0x00, 0x19 }, { 0, 0x04, 0x1e } }, + { { 0, 0x04, 0x1e }, { 1, 0x00, 0x19 } }, + { { 0, 0x04, 0x1e }, { 1, 0x03, 0x17 } }, + { { 0, 0x06, 0x1b }, { 1, 0x03, 0x17 } }, + { { 0, 0x04, 0x1f }, { 1, 0x00, 0x1a } }, + { { 0, 0x04, 0x1f }, { 1, 0x00, 0x1a } }, + { { 0, 0x05, 0x1e }, { 1, 0x00, 0x1a } }, + { { 0, 0x08, 0x18 }, { 1, 0x00, 0x1b } }, + { { 1, 0x00, 0x1b }, { 0, 0x05, 0x1f } }, + { { 0, 0x05, 0x1f }, { 1, 0x00, 0x1b } }, + { { 0, 0x05, 0x1f }, { 1, 0x00, 0x1b } }, + { { 0, 0x06, 0x1e }, { 1, 0x00, 0x1c } }, + { { 0, 0x06, 0x1e }, { 1, 0x00, 0x1c } }, + { { 0, 0x06, 0x1e }, { 1, 0x00, 0x1c } }, + { { 0, 0x06, 0x1f }, { 1, 0x00, 0x1c } }, + { { 0, 0x08, 0x1b }, { 1, 0x00, 0x1d } }, + { { 1, 0x00, 0x1d }, { 0, 0x07, 0x1e } }, + { { 0, 0x07, 0x1e }, { 1, 0x00, 0x1d } }, + { { 0, 0x08, 0x1c }, { 1, 0x00, 0x1d } }, + { { 0, 0x07, 0x1f }, { 1, 0x00, 0x1e } }, + { { 0, 0x07, 0x1f }, { 1, 0x00, 0x1e } }, + { { 0, 0x07, 0x1f }, { 1, 0x00, 0x1e } }, + { { 0, 0x08, 0x1e }, { 1, 0x00, 0x1e } }, + { { 0, 0x08, 0x1e }, { 1, 0x00, 0x1f } }, + { { 1, 0x00, 0x1f }, { 0, 0x08, 0x1e } }, + { { 0, 0x0a, 0x1b }, { 1, 0x00, 0x1f } }, + { { 0, 0x08, 0x1f }, { 1, 0x00, 0x1f } }, + { { 0, 0x08, 0x1f }, { 1, 0x01, 0x1f } }, + { { 0, 0x09, 0x1e }, { 1, 0x01, 0x1f } }, + { { 0, 0x0c, 0x18 }, { 1, 0x04, 0x1c } }, + { { 0, 0x09, 0x1f }, { 1, 0x04, 0x1c } }, + { { 0, 0x09, 0x1f }, { 1, 0x02, 0x1f } }, + { { 1, 0x02, 0x1f }, { 0, 0x09, 0x1f } }, + { { 1, 0x04, 0x1d }, { 0, 0x0a, 0x1e } }, + { { 0, 0x0a, 0x1e }, { 1, 0x04, 0x1d } }, + { { 0, 0x0a, 0x1e }, { 1, 0x03, 0x1f } }, + { { 0, 0x0a, 0x1f }, { 1, 0x03, 0x1f } }, + { { 0, 0x0c, 0x1b }, { 1, 0x04, 0x1e } }, + { { 0, 0x0b, 0x1e }, { 1, 0x04, 0x1e } }, + { { 0, 0x0b, 0x1e }, { 1, 0x04, 0x1e } }, + { { 0, 0x0c, 0x1c }, { 1, 0x04, 0x1f } }, + { { 1, 0x04, 0x1f }, { 0, 0x0b, 0x1f } }, + { { 0, 0x0b, 0x1f }, { 1, 0x04, 0x1f } }, + { { 0, 0x0b, 0x1f }, { 1, 0x04, 0x1f } }, + { { 0, 0x0c, 0x1e }, { 1, 0x05, 0x1f } }, + { { 0, 0x0c, 0x1e }, { 1, 0x05, 0x1f } }, + { { 0, 0x0c, 0x1e }, { 1, 0x05, 0x1f } }, + { { 0, 0x0e, 0x1b }, { 1, 0x05, 0x1f } }, + { { 0, 0x0c, 0x1f }, { 1, 0x06, 0x1f } }, + { { 1, 0x06, 0x1f }, { 0, 0x0c, 0x1f } }, + { { 0, 0x0d, 0x1e }, { 1, 0x06, 0x1f } }, + { { 0, 0x10, 0x18 }, { 1, 0x06, 0x1f } }, + { { 0, 0x0d, 0x1f }, { 1, 0x07, 0x1f } }, + { { 0, 0x0d, 0x1f }, { 1, 0x07, 0x1f } }, + { { 0, 0x0d, 0x1f }, { 1, 0x07, 0x1f } }, + { { 0, 0x0e, 0x1e }, { 1, 0x07, 0x1f } }, + { { 0, 0x0e, 0x1e }, { 1, 0x08, 0x1f } }, + { { 1, 0x08, 0x1f }, { 0, 0x0e, 0x1e } }, + { { 0, 0x0e, 0x1f }, { 1, 0x08, 0x1f } }, + { { 0, 0x10, 0x1b }, { 1, 0x08, 0x1f } }, + { { 0, 0x0f, 0x1e }, { 1, 0x09, 0x1f } }, + { { 0, 0x0f, 0x1e }, { 1, 0x09, 0x1f } }, + { { 0, 0x10, 0x1c }, { 1, 0x0c, 0x1c } }, + { { 0, 0x0f, 0x1f }, { 1, 0x0c, 0x1c } }, + { { 0, 0x0f, 0x1f }, { 1, 0x0a, 0x1f } }, + { { 1, 0x0a, 0x1f }, { 0, 0x0f, 0x1f } }, + { { 1, 0x0c, 0x1d }, { 0, 0x10, 0x1e } }, + { { 0, 0x10, 0x1e }, { 1, 0x0c, 0x1d } }, + { { 0, 0x10, 0x1e }, { 1, 0x0b, 0x1f } }, + { { 0, 0x12, 0x1b }, { 1, 0x0b, 0x1f } }, + { { 0, 0x10, 0x1f }, { 1, 0x0c, 0x1e } }, + { { 0, 0x10, 0x1f }, { 1, 0x0c, 0x1e } }, + { { 0, 0x11, 0x1e }, { 1, 0x0c, 0x1e } }, + { { 0, 0x14, 0x18 }, { 1, 0x0c, 0x1f } }, + { { 1, 0x0c, 0x1f }, { 0, 0x11, 0x1f } }, + { { 0, 0x11, 0x1f }, { 1, 0x0c, 0x1f } }, + { { 0, 0x11, 0x1f }, { 1, 0x0c, 0x1f } }, + { { 0, 0x12, 0x1e }, { 1, 0x0d, 0x1f } }, + { { 0, 0x12, 0x1e }, { 1, 0x0d, 0x1f } }, + { { 0, 0x12, 0x1e }, { 1, 0x0d, 0x1f } }, + { { 0, 0x12, 0x1f }, { 1, 0x0d, 0x1f } }, + { { 0, 0x14, 0x1b }, { 1, 0x0e, 0x1f } }, + { { 1, 0x0e, 0x1f }, { 0, 0x13, 0x1e } }, + { { 0, 0x13, 0x1e }, { 1, 0x0e, 0x1f } }, + { { 0, 0x14, 0x1c }, { 1, 0x0e, 0x1f } }, + { { 0, 0x13, 0x1f }, { 1, 0x0f, 0x1f } }, + { { 0, 0x13, 0x1f }, { 1, 0x0f, 0x1f } }, + { { 0, 0x13, 0x1f }, { 1, 0x0f, 0x1f } }, + { { 0, 0x14, 0x1e }, { 1, 0x0f, 0x1f } }, + { { 0, 0x14, 0x1e }, { 1, 0x10, 0x1f } }, + { { 1, 0x10, 0x1f }, { 0, 0x14, 0x1e } }, + { { 0, 0x16, 0x1b }, { 1, 0x10, 0x1f } }, + { { 0, 0x14, 0x1f }, { 1, 0x10, 0x1f } }, + { { 0, 0x14, 0x1f }, { 1, 0x11, 0x1f } }, + { { 0, 0x15, 0x1e }, { 1, 0x11, 0x1f } }, + { { 0, 0x18, 0x18 }, { 1, 0x14, 0x1c } }, + { { 0, 0x15, 0x1f }, { 1, 0x14, 0x1c } }, + { { 0, 0x15, 0x1f }, { 1, 0x12, 0x1f } }, + { { 1, 0x12, 0x1f }, { 0, 0x15, 0x1f } }, + { { 1, 0x14, 0x1d }, { 0, 0x16, 0x1e } }, + { { 0, 0x16, 0x1e }, { 1, 0x14, 0x1d } }, + { { 0, 0x16, 0x1e }, { 1, 0x13, 0x1f } }, + { { 0, 0x16, 0x1f }, { 1, 0x13, 0x1f } }, + { { 0, 0x18, 0x1b }, { 1, 0x14, 0x1e } }, + { { 0, 0x17, 0x1e }, { 1, 0x14, 0x1e } }, + { { 0, 0x17, 0x1e }, { 1, 0x14, 0x1e } }, + { { 0, 0x18, 0x1c }, { 1, 0x14, 0x1f } }, + { { 1, 0x14, 0x1f }, { 0, 0x17, 0x1f } }, + { { 0, 0x17, 0x1f }, { 1, 0x14, 0x1f } }, + { { 0, 0x17, 0x1f }, { 1, 0x14, 0x1f } }, + { { 0, 0x18, 0x1e }, { 1, 0x15, 0x1f } }, + { { 0, 0x18, 0x1e }, { 1, 0x15, 0x1f } }, + { { 0, 0x18, 0x1e }, { 1, 0x15, 0x1f } }, + { { 0, 0x1a, 0x1b }, { 1, 0x15, 0x1f } }, + { { 0, 0x18, 0x1f }, { 1, 0x16, 0x1f } }, + { { 1, 0x16, 0x1f }, { 0, 0x18, 0x1f } }, + { { 0, 0x19, 0x1e }, { 1, 0x16, 0x1f } }, + { { 0, 0x19, 0x1e }, { 1, 0x16, 0x1f } }, + { { 0, 0x19, 0x1f }, { 1, 0x17, 0x1f } }, + { { 0, 0x19, 0x1f }, { 1, 0x17, 0x1f } }, + { { 0, 0x19, 0x1f }, { 1, 0x17, 0x1f } }, + { { 0, 0x1a, 0x1e }, { 1, 0x17, 0x1f } }, + { { 0, 0x1a, 0x1e }, { 1, 0x18, 0x1f } }, + { { 1, 0x18, 0x1f }, { 0, 0x1a, 0x1e } }, + { { 0, 0x1a, 0x1f }, { 1, 0x18, 0x1f } }, + { { 0, 0x1a, 0x1f }, { 1, 0x18, 0x1f } }, + { { 0, 0x1b, 0x1e }, { 1, 0x19, 0x1f } }, + { { 0, 0x1b, 0x1e }, { 1, 0x19, 0x1f } }, + { { 0, 0x1c, 0x1c }, { 1, 0x1c, 0x1c } }, + { { 0, 0x1b, 0x1f }, { 1, 0x1c, 0x1c } }, + { { 0, 0x1b, 0x1f }, { 1, 0x1a, 0x1f } }, + { { 1, 0x1a, 0x1f }, { 0, 0x1b, 0x1f } }, + { { 1, 0x1c, 0x1d }, { 0, 0x1c, 0x1e } }, + { { 0, 0x1c, 0x1e }, { 1, 0x1c, 0x1d } }, + { { 0, 0x1c, 0x1e }, { 1, 0x1b, 0x1f } }, + { { 1, 0x1b, 0x1f }, { 0, 0x1c, 0x1f } }, + { { 0, 0x1c, 0x1f }, { 1, 0x1c, 0x1e } }, + { { 0, 0x1c, 0x1f }, { 1, 0x1c, 0x1e } }, + { { 0, 0x1d, 0x1e }, { 1, 0x1c, 0x1e } }, + { { 0, 0x1d, 0x1e }, { 1, 0x1c, 0x1f } }, + { { 1, 0x1c, 0x1f }, { 0, 0x1d, 0x1f } }, + { { 0, 0x1d, 0x1f }, { 1, 0x1c, 0x1f } }, + { { 0, 0x1d, 0x1f }, { 1, 0x1c, 0x1f } }, + { { 0, 0x1e, 0x1e }, { 1, 0x1d, 0x1f } }, + { { 0, 0x1e, 0x1e }, { 1, 0x1d, 0x1f } }, + { { 0, 0x1e, 0x1e }, { 1, 0x1d, 0x1f } }, + { { 0, 0x1e, 0x1f }, { 1, 0x1d, 0x1f } }, + { { 0, 0x1e, 0x1f }, { 1, 0x1e, 0x1f } }, + { { 1, 0x1e, 0x1f }, { 0, 0x1e, 0x1f } }, + { { 1, 0x1e, 0x1f }, { 0, 0x1e, 0x1f } }, + { { 0, 0x1f, 0x1f }, { 1, 0x1e, 0x1f } }, + { { 0, 0x1f, 0x1f }, { 1, 0x1f, 0x1f } }, + { { 0, 0x1f, 0x1f }, { 1, 0x1f, 0x1f } } }; */ static unsigned char Optimal6CompressDXT1[256][2][3] = { - { { 0, 0x00, 0x00 }, { 1, 0x00, 0x00 } }, - { { 0, 0x00, 0x01 }, { 1, 0x00, 0x00 } }, - { { 0, 0x00, 0x02 }, { 1, 0x00, 0x01 } }, - { { 0, 0x00, 0x02 }, { 1, 0x00, 0x01 } }, - { { 0, 0x00, 0x03 }, { 1, 0x00, 0x02 } }, - { { 0, 0x00, 0x04 }, { 1, 0x00, 0x02 } }, - { { 0, 0x00, 0x05 }, { 1, 0x00, 0x03 } }, - { { 0, 0x00, 0x05 }, { 1, 0x00, 0x03 } }, - { { 0, 0x00, 0x06 }, { 1, 0x00, 0x04 } }, - { { 0, 0x00, 0x07 }, { 1, 0x00, 0x04 } }, - { { 0, 0x00, 0x08 }, { 1, 0x00, 0x05 } }, - { { 0, 0x00, 0x08 }, { 1, 0x00, 0x05 } }, - { { 0, 0x00, 0x09 }, { 1, 0x00, 0x06 } }, - { { 0, 0x00, 0x0a }, { 1, 0x00, 0x06 } }, - { { 0, 0x00, 0x0b }, { 1, 0x00, 0x07 } }, - { { 0, 0x00, 0x0b }, { 1, 0x00, 0x07 } }, - { { 0, 0x00, 0x0c }, { 1, 0x00, 0x08 } }, - { { 0, 0x00, 0x0d }, { 1, 0x00, 0x08 } }, - { { 0, 0x00, 0x0e }, { 1, 0x00, 0x09 } }, - { { 0, 0x00, 0x0e }, { 1, 0x00, 0x09 } }, - { { 0, 0x00, 0x0f }, { 1, 0x00, 0x0a } }, - { { 0, 0x00, 0x10 }, { 1, 0x00, 0x0a } }, - { { 0, 0x01, 0x0f }, { 1, 0x00, 0x0b } }, - { { 0, 0x00, 0x11 }, { 1, 0x00, 0x0b } }, - { { 0, 0x00, 0x12 }, { 1, 0x00, 0x0c } }, - { { 0, 0x00, 0x13 }, { 1, 0x00, 0x0c } }, - { { 0, 0x03, 0x0e }, { 1, 0x00, 0x0d } }, - { { 0, 0x00, 0x14 }, { 1, 0x00, 0x0d } }, - { { 0, 0x00, 0x15 }, { 1, 0x00, 0x0e } }, - { { 0, 0x00, 0x16 }, { 1, 0x00, 0x0e } }, - { { 0, 0x04, 0x0f }, { 1, 0x00, 0x0f } }, - { { 0, 0x00, 0x17 }, { 1, 0x00, 0x0f } }, - { { 0, 0x00, 0x18 }, { 1, 0x00, 0x10 } }, - { { 0, 0x00, 0x19 }, { 1, 0x00, 0x10 } }, - { { 0, 0x06, 0x0e }, { 1, 0x00, 0x11 } }, - { { 0, 0x00, 0x1a }, { 1, 0x00, 0x11 } }, - { { 0, 0x00, 0x1b }, { 1, 0x00, 0x12 } }, - { { 0, 0x00, 0x1c }, { 1, 0x00, 0x12 } }, - { { 0, 0x07, 0x0f }, { 1, 0x00, 0x13 } }, - { { 0, 0x00, 0x1d }, { 1, 0x00, 0x13 } }, - { { 0, 0x00, 0x1e }, { 1, 0x00, 0x14 } }, - { { 0, 0x00, 0x1f }, { 1, 0x00, 0x14 } }, - { { 0, 0x09, 0x0e }, { 1, 0x00, 0x15 } }, - { { 0, 0x00, 0x20 }, { 1, 0x00, 0x15 } }, - { { 0, 0x00, 0x21 }, { 1, 0x00, 0x16 } }, - { { 0, 0x02, 0x1e }, { 1, 0x00, 0x16 } }, - { { 0, 0x00, 0x22 }, { 1, 0x00, 0x17 } }, - { { 0, 0x00, 0x23 }, { 1, 0x00, 0x17 } }, - { { 0, 0x00, 0x24 }, { 1, 0x00, 0x18 } }, - { { 0, 0x03, 0x1f }, { 1, 0x00, 0x18 } }, - { { 0, 0x00, 0x25 }, { 1, 0x00, 0x19 } }, - { { 0, 0x00, 0x26 }, { 1, 0x00, 0x19 } }, - { { 0, 0x00, 0x27 }, { 1, 0x00, 0x1a } }, - { { 0, 0x05, 0x1e }, { 1, 0x00, 0x1a } }, - { { 0, 0x00, 0x28 }, { 1, 0x00, 0x1b } }, - { { 0, 0x00, 0x29 }, { 1, 0x00, 0x1b } }, - { { 0, 0x00, 0x2a }, { 1, 0x00, 0x1c } }, - { { 0, 0x06, 0x1f }, { 1, 0x00, 0x1c } }, - { { 0, 0x00, 0x2b }, { 1, 0x00, 0x1d } }, - { { 0, 0x00, 0x2c }, { 1, 0x00, 0x1d } }, - { { 0, 0x00, 0x2d }, { 1, 0x00, 0x1e } }, - { { 0, 0x08, 0x1e }, { 1, 0x00, 0x1e } }, - { { 0, 0x00, 0x2e }, { 1, 0x00, 0x1f } }, - { { 0, 0x00, 0x2f }, { 1, 0x00, 0x1f } }, - { { 0, 0x01, 0x2e }, { 1, 0x01, 0x1f } }, - { { 0, 0x00, 0x30 }, { 1, 0x00, 0x20 } }, - { { 0, 0x00, 0x31 }, { 1, 0x02, 0x1f } }, - { { 0, 0x00, 0x32 }, { 1, 0x00, 0x21 } }, - { { 0, 0x02, 0x2f }, { 1, 0x03, 0x1f } }, - { { 0, 0x00, 0x33 }, { 1, 0x00, 0x22 } }, - { { 0, 0x00, 0x34 }, { 1, 0x04, 0x1f } }, - { { 0, 0x00, 0x35 }, { 1, 0x00, 0x23 } }, - { { 0, 0x04, 0x2e }, { 1, 0x05, 0x1f } }, - { { 0, 0x00, 0x36 }, { 1, 0x00, 0x24 } }, - { { 0, 0x00, 0x37 }, { 1, 0x06, 0x1f } }, - { { 0, 0x00, 0x38 }, { 1, 0x00, 0x25 } }, - { { 0, 0x05, 0x2f }, { 1, 0x07, 0x1f } }, - { { 0, 0x00, 0x39 }, { 1, 0x00, 0x26 } }, - { { 0, 0x00, 0x3a }, { 1, 0x08, 0x1f } }, - { { 0, 0x00, 0x3b }, { 1, 0x00, 0x27 } }, - { { 0, 0x07, 0x2e }, { 1, 0x09, 0x1f } }, - { { 0, 0x00, 0x3c }, { 1, 0x00, 0x28 } }, - { { 0, 0x00, 0x3d }, { 1, 0x0a, 0x1f } }, - { { 0, 0x00, 0x3e }, { 1, 0x00, 0x29 } }, - { { 0, 0x08, 0x2f }, { 1, 0x0b, 0x1f } }, - { { 0, 0x00, 0x3f }, { 1, 0x00, 0x2a } }, - { { 0, 0x01, 0x3e }, { 1, 0x0c, 0x1f } }, - { { 0, 0x01, 0x3f }, { 1, 0x00, 0x2b } }, - { { 0, 0x0a, 0x2e }, { 1, 0x0d, 0x1f } }, - { { 0, 0x02, 0x3e }, { 1, 0x00, 0x2c } }, - { { 0, 0x02, 0x3f }, { 1, 0x0e, 0x1f } }, - { { 0, 0x03, 0x3e }, { 1, 0x00, 0x2d } }, - { { 0, 0x0b, 0x2f }, { 1, 0x0f, 0x1f } }, - { { 0, 0x03, 0x3f }, { 1, 0x00, 0x2e } }, - { { 0, 0x04, 0x3e }, { 1, 0x00, 0x2e } }, - { { 0, 0x04, 0x3f }, { 1, 0x00, 0x2f } }, - { { 0, 0x0d, 0x2e }, { 1, 0x00, 0x2f } }, - { { 0, 0x05, 0x3e }, { 1, 0x00, 0x30 } }, - { { 0, 0x05, 0x3f }, { 1, 0x00, 0x30 } }, - { { 0, 0x06, 0x3e }, { 1, 0x00, 0x31 } }, - { { 0, 0x0e, 0x2f }, { 1, 0x00, 0x31 } }, - { { 0, 0x06, 0x3f }, { 1, 0x00, 0x32 } }, - { { 0, 0x07, 0x3e }, { 1, 0x00, 0x32 } }, - { { 0, 0x07, 0x3f }, { 1, 0x00, 0x33 } }, - { { 0, 0x10, 0x2d }, { 1, 0x00, 0x33 } }, - { { 0, 0x08, 0x3e }, { 1, 0x00, 0x34 } }, - { { 0, 0x08, 0x3f }, { 1, 0x00, 0x34 } }, - { { 0, 0x09, 0x3e }, { 1, 0x00, 0x35 } }, - { { 0, 0x10, 0x30 }, { 1, 0x00, 0x35 } }, - { { 0, 0x09, 0x3f }, { 1, 0x00, 0x36 } }, - { { 0, 0x0a, 0x3e }, { 1, 0x00, 0x36 } }, - { { 0, 0x0a, 0x3f }, { 1, 0x00, 0x37 } }, - { { 0, 0x10, 0x33 }, { 1, 0x00, 0x37 } }, - { { 0, 0x0b, 0x3e }, { 1, 0x00, 0x38 } }, - { { 0, 0x0b, 0x3f }, { 1, 0x00, 0x38 } }, - { { 0, 0x0c, 0x3e }, { 1, 0x00, 0x39 } }, - { { 0, 0x10, 0x36 }, { 1, 0x00, 0x39 } }, - { { 0, 0x0c, 0x3f }, { 1, 0x00, 0x3a } }, - { { 0, 0x0d, 0x3e }, { 1, 0x00, 0x3a } }, - { { 0, 0x0d, 0x3f }, { 1, 0x00, 0x3b } }, - { { 0, 0x10, 0x39 }, { 1, 0x00, 0x3b } }, - { { 0, 0x0e, 0x3e }, { 1, 0x00, 0x3c } }, - { { 0, 0x0e, 0x3f }, { 1, 0x00, 0x3c } }, - { { 0, 0x0f, 0x3e }, { 1, 0x00, 0x3d } }, - { { 0, 0x10, 0x3c }, { 1, 0x00, 0x3d } }, - { { 0, 0x0f, 0x3f }, { 1, 0x00, 0x3e } }, - { { 0, 0x18, 0x2e }, { 1, 0x00, 0x3e } }, - { { 0, 0x10, 0x3e }, { 1, 0x00, 0x3f } }, - { { 0, 0x10, 0x3f }, { 1, 0x00, 0x3f } }, - { { 0, 0x11, 0x3e }, { 1, 0x01, 0x3f } }, - { { 0, 0x19, 0x2f }, { 1, 0x10, 0x30 } }, - { { 0, 0x11, 0x3f }, { 1, 0x02, 0x3f } }, - { { 0, 0x12, 0x3e }, { 1, 0x10, 0x31 } }, - { { 0, 0x12, 0x3f }, { 1, 0x03, 0x3f } }, - { { 0, 0x1b, 0x2e }, { 1, 0x10, 0x32 } }, - { { 0, 0x13, 0x3e }, { 1, 0x04, 0x3f } }, - { { 0, 0x13, 0x3f }, { 1, 0x10, 0x33 } }, - { { 0, 0x14, 0x3e }, { 1, 0x05, 0x3f } }, - { { 0, 0x1c, 0x2f }, { 1, 0x10, 0x34 } }, - { { 0, 0x14, 0x3f }, { 1, 0x06, 0x3f } }, - { { 0, 0x15, 0x3e }, { 1, 0x10, 0x35 } }, - { { 0, 0x15, 0x3f }, { 1, 0x07, 0x3f } }, - { { 0, 0x1e, 0x2e }, { 1, 0x10, 0x36 } }, - { { 0, 0x16, 0x3e }, { 1, 0x08, 0x3f } }, - { { 0, 0x16, 0x3f }, { 1, 0x10, 0x37 } }, - { { 0, 0x17, 0x3e }, { 1, 0x09, 0x3f } }, - { { 0, 0x1f, 0x2f }, { 1, 0x10, 0x38 } }, - { { 0, 0x17, 0x3f }, { 1, 0x0a, 0x3f } }, - { { 0, 0x18, 0x3e }, { 1, 0x10, 0x39 } }, - { { 0, 0x18, 0x3f }, { 1, 0x0b, 0x3f } }, - { { 0, 0x20, 0x2f }, { 1, 0x10, 0x3a } }, - { { 0, 0x19, 0x3e }, { 1, 0x0c, 0x3f } }, - { { 0, 0x19, 0x3f }, { 1, 0x10, 0x3b } }, - { { 0, 0x1a, 0x3e }, { 1, 0x0d, 0x3f } }, - { { 0, 0x20, 0x32 }, { 1, 0x10, 0x3c } }, - { { 0, 0x1a, 0x3f }, { 1, 0x0e, 0x3f } }, - { { 0, 0x1b, 0x3e }, { 1, 0x10, 0x3d } }, - { { 0, 0x1b, 0x3f }, { 1, 0x0f, 0x3f } }, - { { 0, 0x20, 0x35 }, { 1, 0x10, 0x3e } }, - { { 0, 0x1c, 0x3e }, { 1, 0x10, 0x3e } }, - { { 0, 0x1c, 0x3f }, { 1, 0x10, 0x3f } }, - { { 0, 0x1d, 0x3e }, { 1, 0x10, 0x3f } }, - { { 0, 0x20, 0x38 }, { 1, 0x11, 0x3f } }, - { { 0, 0x1d, 0x3f }, { 1, 0x11, 0x3f } }, - { { 0, 0x1e, 0x3e }, { 1, 0x12, 0x3f } }, - { { 0, 0x1e, 0x3f }, { 1, 0x12, 0x3f } }, - { { 0, 0x20, 0x3b }, { 1, 0x13, 0x3f } }, - { { 0, 0x1f, 0x3e }, { 1, 0x13, 0x3f } }, - { { 0, 0x1f, 0x3f }, { 1, 0x14, 0x3f } }, - { { 0, 0x20, 0x3d }, { 1, 0x14, 0x3f } }, - { { 0, 0x20, 0x3e }, { 1, 0x15, 0x3f } }, - { { 0, 0x20, 0x3f }, { 1, 0x15, 0x3f } }, - { { 0, 0x29, 0x2e }, { 1, 0x16, 0x3f } }, - { { 0, 0x21, 0x3e }, { 1, 0x16, 0x3f } }, - { { 0, 0x21, 0x3f }, { 1, 0x17, 0x3f } }, - { { 0, 0x22, 0x3e }, { 1, 0x17, 0x3f } }, - { { 0, 0x2a, 0x2f }, { 1, 0x18, 0x3f } }, - { { 0, 0x22, 0x3f }, { 1, 0x18, 0x3f } }, - { { 0, 0x23, 0x3e }, { 1, 0x19, 0x3f } }, - { { 0, 0x23, 0x3f }, { 1, 0x19, 0x3f } }, - { { 0, 0x2c, 0x2e }, { 1, 0x1a, 0x3f } }, - { { 0, 0x24, 0x3e }, { 1, 0x1a, 0x3f } }, - { { 0, 0x24, 0x3f }, { 1, 0x1b, 0x3f } }, - { { 0, 0x25, 0x3e }, { 1, 0x1b, 0x3f } }, - { { 0, 0x2d, 0x2f }, { 1, 0x1c, 0x3f } }, - { { 0, 0x25, 0x3f }, { 1, 0x1c, 0x3f } }, - { { 0, 0x26, 0x3e }, { 1, 0x1d, 0x3f } }, - { { 0, 0x26, 0x3f }, { 1, 0x1d, 0x3f } }, - { { 1, 0x1e, 0x3f }, { 0, 0x26, 0x3f } }, - { { 0, 0x27, 0x3e }, { 1, 0x1e, 0x3f } }, - { { 0, 0x27, 0x3f }, { 1, 0x1f, 0x3f } }, - { { 0, 0x28, 0x3e }, { 1, 0x1f, 0x3f } }, - { { 1, 0x20, 0x3f }, { 0, 0x28, 0x3e } }, - { { 0, 0x28, 0x3f }, { 1, 0x20, 0x3f } }, - { { 0, 0x29, 0x3e }, { 1, 0x21, 0x3f } }, - { { 0, 0x29, 0x3f }, { 1, 0x30, 0x30 } }, - { { 0, 0x30, 0x31 }, { 1, 0x22, 0x3f } }, - { { 0, 0x2a, 0x3e }, { 1, 0x30, 0x31 } }, - { { 0, 0x2a, 0x3f }, { 1, 0x23, 0x3f } }, - { { 0, 0x2b, 0x3e }, { 1, 0x30, 0x32 } }, - { { 0, 0x30, 0x34 }, { 1, 0x24, 0x3f } }, - { { 0, 0x2b, 0x3f }, { 1, 0x30, 0x33 } }, - { { 0, 0x2c, 0x3e }, { 1, 0x25, 0x3f } }, - { { 0, 0x2c, 0x3f }, { 1, 0x30, 0x34 } }, - { { 0, 0x30, 0x37 }, { 1, 0x26, 0x3f } }, - { { 0, 0x2d, 0x3e }, { 1, 0x30, 0x35 } }, - { { 0, 0x2d, 0x3f }, { 1, 0x27, 0x3f } }, - { { 0, 0x2e, 0x3e }, { 1, 0x30, 0x36 } }, - { { 0, 0x30, 0x3a }, { 1, 0x28, 0x3f } }, - { { 0, 0x2e, 0x3f }, { 1, 0x30, 0x37 } }, - { { 0, 0x2f, 0x3e }, { 1, 0x29, 0x3f } }, - { { 0, 0x2f, 0x3f }, { 1, 0x30, 0x38 } }, - { { 0, 0x30, 0x3d }, { 1, 0x2a, 0x3f } }, - { { 0, 0x30, 0x3e }, { 1, 0x30, 0x39 } }, - { { 1, 0x2b, 0x3f }, { 0, 0x30, 0x3e } }, - { { 0, 0x30, 0x3f }, { 1, 0x30, 0x3a } }, - { { 0, 0x31, 0x3e }, { 1, 0x2c, 0x3f } }, - { { 0, 0x31, 0x3f }, { 1, 0x30, 0x3b } }, - { { 1, 0x2d, 0x3f }, { 0, 0x31, 0x3f } }, - { { 0, 0x32, 0x3e }, { 1, 0x30, 0x3c } }, - { { 0, 0x32, 0x3f }, { 1, 0x2e, 0x3f } }, - { { 0, 0x33, 0x3e }, { 1, 0x30, 0x3d } }, - { { 1, 0x2f, 0x3f }, { 0, 0x33, 0x3e } }, - { { 0, 0x33, 0x3f }, { 1, 0x30, 0x3e } }, - { { 0, 0x34, 0x3e }, { 1, 0x30, 0x3e } }, - { { 0, 0x34, 0x3f }, { 1, 0x30, 0x3f } }, - { { 0, 0x34, 0x3f }, { 1, 0x30, 0x3f } }, - { { 0, 0x35, 0x3e }, { 1, 0x31, 0x3f } }, - { { 0, 0x35, 0x3f }, { 1, 0x31, 0x3f } }, - { { 0, 0x36, 0x3e }, { 1, 0x32, 0x3f } }, - { { 0, 0x36, 0x3e }, { 1, 0x32, 0x3f } }, - { { 0, 0x36, 0x3f }, { 1, 0x33, 0x3f } }, - { { 0, 0x37, 0x3e }, { 1, 0x33, 0x3f } }, - { { 0, 0x37, 0x3f }, { 1, 0x34, 0x3f } }, - { { 0, 0x37, 0x3f }, { 1, 0x34, 0x3f } }, - { { 0, 0x38, 0x3e }, { 1, 0x35, 0x3f } }, - { { 0, 0x38, 0x3f }, { 1, 0x35, 0x3f } }, - { { 0, 0x39, 0x3e }, { 1, 0x36, 0x3f } }, - { { 0, 0x39, 0x3e }, { 1, 0x36, 0x3f } }, - { { 0, 0x39, 0x3f }, { 1, 0x37, 0x3f } }, - { { 0, 0x3a, 0x3e }, { 1, 0x37, 0x3f } }, - { { 0, 0x3a, 0x3f }, { 1, 0x38, 0x3f } }, - { { 0, 0x3a, 0x3f }, { 1, 0x38, 0x3f } }, - { { 0, 0x3b, 0x3e }, { 1, 0x39, 0x3f } }, - { { 0, 0x3b, 0x3f }, { 1, 0x39, 0x3f } }, - { { 0, 0x3c, 0x3e }, { 1, 0x3a, 0x3f } }, - { { 0, 0x3c, 0x3e }, { 1, 0x3a, 0x3f } }, - { { 0, 0x3c, 0x3f }, { 1, 0x3b, 0x3f } }, - { { 0, 0x3d, 0x3e }, { 1, 0x3b, 0x3f } }, - { { 0, 0x3d, 0x3f }, { 1, 0x3c, 0x3f } }, - { { 0, 0x3d, 0x3f }, { 1, 0x3c, 0x3f } }, - { { 0, 0x3e, 0x3e }, { 1, 0x3d, 0x3f } }, - { { 0, 0x3e, 0x3f }, { 1, 0x3d, 0x3f } }, - { { 1, 0x3e, 0x3f }, { 0, 0x3e, 0x3f } }, - { { 0, 0x3f, 0x3f }, { 1, 0x3e, 0x3f } }, - { { 0, 0x3f, 0x3f }, { 1, 0x3f, 0x3f } } + { { 0, 0x00, 0x00 }, { 1, 0x00, 0x00 } }, + { { 0, 0x00, 0x01 }, { 1, 0x00, 0x00 } }, + { { 0, 0x00, 0x02 }, { 1, 0x00, 0x01 } }, + { { 0, 0x00, 0x02 }, { 1, 0x00, 0x01 } }, + { { 0, 0x00, 0x03 }, { 1, 0x00, 0x02 } }, + { { 0, 0x00, 0x04 }, { 1, 0x00, 0x02 } }, + { { 0, 0x00, 0x05 }, { 1, 0x00, 0x03 } }, + { { 0, 0x00, 0x05 }, { 1, 0x00, 0x03 } }, + { { 0, 0x00, 0x06 }, { 1, 0x00, 0x04 } }, + { { 0, 0x00, 0x07 }, { 1, 0x00, 0x04 } }, + { { 0, 0x00, 0x08 }, { 1, 0x00, 0x05 } }, + { { 0, 0x00, 0x08 }, { 1, 0x00, 0x05 } }, + { { 0, 0x00, 0x09 }, { 1, 0x00, 0x06 } }, + { { 0, 0x00, 0x0a }, { 1, 0x00, 0x06 } }, + { { 0, 0x00, 0x0b }, { 1, 0x00, 0x07 } }, + { { 0, 0x00, 0x0b }, { 1, 0x00, 0x07 } }, + { { 0, 0x00, 0x0c }, { 1, 0x00, 0x08 } }, + { { 0, 0x00, 0x0d }, { 1, 0x00, 0x08 } }, + { { 0, 0x00, 0x0e }, { 1, 0x00, 0x09 } }, + { { 0, 0x00, 0x0e }, { 1, 0x00, 0x09 } }, + { { 0, 0x00, 0x0f }, { 1, 0x00, 0x0a } }, + { { 0, 0x00, 0x10 }, { 1, 0x00, 0x0a } }, + { { 0, 0x01, 0x0f }, { 1, 0x00, 0x0b } }, + { { 0, 0x00, 0x11 }, { 1, 0x00, 0x0b } }, + { { 0, 0x00, 0x12 }, { 1, 0x00, 0x0c } }, + { { 0, 0x00, 0x13 }, { 1, 0x00, 0x0c } }, + { { 0, 0x03, 0x0e }, { 1, 0x00, 0x0d } }, + { { 0, 0x00, 0x14 }, { 1, 0x00, 0x0d } }, + { { 0, 0x00, 0x15 }, { 1, 0x00, 0x0e } }, + { { 0, 0x00, 0x16 }, { 1, 0x00, 0x0e } }, + { { 0, 0x04, 0x0f }, { 1, 0x00, 0x0f } }, + { { 0, 0x00, 0x17 }, { 1, 0x00, 0x0f } }, + { { 0, 0x00, 0x18 }, { 1, 0x00, 0x10 } }, + { { 0, 0x00, 0x19 }, { 1, 0x00, 0x10 } }, + { { 0, 0x06, 0x0e }, { 1, 0x00, 0x11 } }, + { { 0, 0x00, 0x1a }, { 1, 0x00, 0x11 } }, + { { 0, 0x00, 0x1b }, { 1, 0x00, 0x12 } }, + { { 0, 0x00, 0x1c }, { 1, 0x00, 0x12 } }, + { { 0, 0x07, 0x0f }, { 1, 0x00, 0x13 } }, + { { 0, 0x00, 0x1d }, { 1, 0x00, 0x13 } }, + { { 0, 0x00, 0x1e }, { 1, 0x00, 0x14 } }, + { { 0, 0x00, 0x1f }, { 1, 0x00, 0x14 } }, + { { 0, 0x09, 0x0e }, { 1, 0x00, 0x15 } }, + { { 0, 0x00, 0x20 }, { 1, 0x00, 0x15 } }, + { { 0, 0x00, 0x21 }, { 1, 0x00, 0x16 } }, + { { 0, 0x02, 0x1e }, { 1, 0x00, 0x16 } }, + { { 0, 0x00, 0x22 }, { 1, 0x00, 0x17 } }, + { { 0, 0x00, 0x23 }, { 1, 0x00, 0x17 } }, + { { 0, 0x00, 0x24 }, { 1, 0x00, 0x18 } }, + { { 0, 0x03, 0x1f }, { 1, 0x00, 0x18 } }, + { { 0, 0x00, 0x25 }, { 1, 0x00, 0x19 } }, + { { 0, 0x00, 0x26 }, { 1, 0x00, 0x19 } }, + { { 0, 0x00, 0x27 }, { 1, 0x00, 0x1a } }, + { { 0, 0x05, 0x1e }, { 1, 0x00, 0x1a } }, + { { 0, 0x00, 0x28 }, { 1, 0x00, 0x1b } }, + { { 0, 0x00, 0x29 }, { 1, 0x00, 0x1b } }, + { { 0, 0x00, 0x2a }, { 1, 0x00, 0x1c } }, + { { 0, 0x06, 0x1f }, { 1, 0x00, 0x1c } }, + { { 0, 0x00, 0x2b }, { 1, 0x00, 0x1d } }, + { { 0, 0x00, 0x2c }, { 1, 0x00, 0x1d } }, + { { 0, 0x00, 0x2d }, { 1, 0x00, 0x1e } }, + { { 0, 0x08, 0x1e }, { 1, 0x00, 0x1e } }, + { { 0, 0x00, 0x2e }, { 1, 0x00, 0x1f } }, + { { 0, 0x00, 0x2f }, { 1, 0x00, 0x1f } }, + { { 0, 0x01, 0x2e }, { 1, 0x01, 0x1f } }, + { { 0, 0x00, 0x30 }, { 1, 0x00, 0x20 } }, + { { 0, 0x00, 0x31 }, { 1, 0x02, 0x1f } }, + { { 0, 0x00, 0x32 }, { 1, 0x00, 0x21 } }, + { { 0, 0x02, 0x2f }, { 1, 0x03, 0x1f } }, + { { 0, 0x00, 0x33 }, { 1, 0x00, 0x22 } }, + { { 0, 0x00, 0x34 }, { 1, 0x04, 0x1f } }, + { { 0, 0x00, 0x35 }, { 1, 0x00, 0x23 } }, + { { 0, 0x04, 0x2e }, { 1, 0x05, 0x1f } }, + { { 0, 0x00, 0x36 }, { 1, 0x00, 0x24 } }, + { { 0, 0x00, 0x37 }, { 1, 0x06, 0x1f } }, + { { 0, 0x00, 0x38 }, { 1, 0x00, 0x25 } }, + { { 0, 0x05, 0x2f }, { 1, 0x07, 0x1f } }, + { { 0, 0x00, 0x39 }, { 1, 0x00, 0x26 } }, + { { 0, 0x00, 0x3a }, { 1, 0x08, 0x1f } }, + { { 0, 0x00, 0x3b }, { 1, 0x00, 0x27 } }, + { { 0, 0x07, 0x2e }, { 1, 0x09, 0x1f } }, + { { 0, 0x00, 0x3c }, { 1, 0x00, 0x28 } }, + { { 0, 0x00, 0x3d }, { 1, 0x0a, 0x1f } }, + { { 0, 0x00, 0x3e }, { 1, 0x00, 0x29 } }, + { { 0, 0x08, 0x2f }, { 1, 0x0b, 0x1f } }, + { { 0, 0x00, 0x3f }, { 1, 0x00, 0x2a } }, + { { 0, 0x01, 0x3e }, { 1, 0x0c, 0x1f } }, + { { 0, 0x01, 0x3f }, { 1, 0x00, 0x2b } }, + { { 0, 0x0a, 0x2e }, { 1, 0x0d, 0x1f } }, + { { 0, 0x02, 0x3e }, { 1, 0x00, 0x2c } }, + { { 0, 0x02, 0x3f }, { 1, 0x0e, 0x1f } }, + { { 0, 0x03, 0x3e }, { 1, 0x00, 0x2d } }, + { { 0, 0x0b, 0x2f }, { 1, 0x0f, 0x1f } }, + { { 0, 0x03, 0x3f }, { 1, 0x00, 0x2e } }, + { { 0, 0x04, 0x3e }, { 1, 0x00, 0x2e } }, + { { 0, 0x04, 0x3f }, { 1, 0x00, 0x2f } }, + { { 0, 0x0d, 0x2e }, { 1, 0x00, 0x2f } }, + { { 0, 0x05, 0x3e }, { 1, 0x00, 0x30 } }, + { { 0, 0x05, 0x3f }, { 1, 0x00, 0x30 } }, + { { 0, 0x06, 0x3e }, { 1, 0x00, 0x31 } }, + { { 0, 0x0e, 0x2f }, { 1, 0x00, 0x31 } }, + { { 0, 0x06, 0x3f }, { 1, 0x00, 0x32 } }, + { { 0, 0x07, 0x3e }, { 1, 0x00, 0x32 } }, + { { 0, 0x07, 0x3f }, { 1, 0x00, 0x33 } }, + { { 0, 0x10, 0x2d }, { 1, 0x00, 0x33 } }, + { { 0, 0x08, 0x3e }, { 1, 0x00, 0x34 } }, + { { 0, 0x08, 0x3f }, { 1, 0x00, 0x34 } }, + { { 0, 0x09, 0x3e }, { 1, 0x00, 0x35 } }, + { { 0, 0x10, 0x30 }, { 1, 0x00, 0x35 } }, + { { 0, 0x09, 0x3f }, { 1, 0x00, 0x36 } }, + { { 0, 0x0a, 0x3e }, { 1, 0x00, 0x36 } }, + { { 0, 0x0a, 0x3f }, { 1, 0x00, 0x37 } }, + { { 0, 0x10, 0x33 }, { 1, 0x00, 0x37 } }, + { { 0, 0x0b, 0x3e }, { 1, 0x00, 0x38 } }, + { { 0, 0x0b, 0x3f }, { 1, 0x00, 0x38 } }, + { { 0, 0x0c, 0x3e }, { 1, 0x00, 0x39 } }, + { { 0, 0x10, 0x36 }, { 1, 0x00, 0x39 } }, + { { 0, 0x0c, 0x3f }, { 1, 0x00, 0x3a } }, + { { 0, 0x0d, 0x3e }, { 1, 0x00, 0x3a } }, + { { 0, 0x0d, 0x3f }, { 1, 0x00, 0x3b } }, + { { 0, 0x10, 0x39 }, { 1, 0x00, 0x3b } }, + { { 0, 0x0e, 0x3e }, { 1, 0x00, 0x3c } }, + { { 0, 0x0e, 0x3f }, { 1, 0x00, 0x3c } }, + { { 0, 0x0f, 0x3e }, { 1, 0x00, 0x3d } }, + { { 0, 0x10, 0x3c }, { 1, 0x00, 0x3d } }, + { { 0, 0x0f, 0x3f }, { 1, 0x00, 0x3e } }, + { { 0, 0x18, 0x2e }, { 1, 0x00, 0x3e } }, + { { 0, 0x10, 0x3e }, { 1, 0x00, 0x3f } }, + { { 0, 0x10, 0x3f }, { 1, 0x00, 0x3f } }, + { { 0, 0x11, 0x3e }, { 1, 0x01, 0x3f } }, + { { 0, 0x19, 0x2f }, { 1, 0x10, 0x30 } }, + { { 0, 0x11, 0x3f }, { 1, 0x02, 0x3f } }, + { { 0, 0x12, 0x3e }, { 1, 0x10, 0x31 } }, + { { 0, 0x12, 0x3f }, { 1, 0x03, 0x3f } }, + { { 0, 0x1b, 0x2e }, { 1, 0x10, 0x32 } }, + { { 0, 0x13, 0x3e }, { 1, 0x04, 0x3f } }, + { { 0, 0x13, 0x3f }, { 1, 0x10, 0x33 } }, + { { 0, 0x14, 0x3e }, { 1, 0x05, 0x3f } }, + { { 0, 0x1c, 0x2f }, { 1, 0x10, 0x34 } }, + { { 0, 0x14, 0x3f }, { 1, 0x06, 0x3f } }, + { { 0, 0x15, 0x3e }, { 1, 0x10, 0x35 } }, + { { 0, 0x15, 0x3f }, { 1, 0x07, 0x3f } }, + { { 0, 0x1e, 0x2e }, { 1, 0x10, 0x36 } }, + { { 0, 0x16, 0x3e }, { 1, 0x08, 0x3f } }, + { { 0, 0x16, 0x3f }, { 1, 0x10, 0x37 } }, + { { 0, 0x17, 0x3e }, { 1, 0x09, 0x3f } }, + { { 0, 0x1f, 0x2f }, { 1, 0x10, 0x38 } }, + { { 0, 0x17, 0x3f }, { 1, 0x0a, 0x3f } }, + { { 0, 0x18, 0x3e }, { 1, 0x10, 0x39 } }, + { { 0, 0x18, 0x3f }, { 1, 0x0b, 0x3f } }, + { { 0, 0x20, 0x2f }, { 1, 0x10, 0x3a } }, + { { 0, 0x19, 0x3e }, { 1, 0x0c, 0x3f } }, + { { 0, 0x19, 0x3f }, { 1, 0x10, 0x3b } }, + { { 0, 0x1a, 0x3e }, { 1, 0x0d, 0x3f } }, + { { 0, 0x20, 0x32 }, { 1, 0x10, 0x3c } }, + { { 0, 0x1a, 0x3f }, { 1, 0x0e, 0x3f } }, + { { 0, 0x1b, 0x3e }, { 1, 0x10, 0x3d } }, + { { 0, 0x1b, 0x3f }, { 1, 0x0f, 0x3f } }, + { { 0, 0x20, 0x35 }, { 1, 0x10, 0x3e } }, + { { 0, 0x1c, 0x3e }, { 1, 0x10, 0x3e } }, + { { 0, 0x1c, 0x3f }, { 1, 0x10, 0x3f } }, + { { 0, 0x1d, 0x3e }, { 1, 0x10, 0x3f } }, + { { 0, 0x20, 0x38 }, { 1, 0x11, 0x3f } }, + { { 0, 0x1d, 0x3f }, { 1, 0x11, 0x3f } }, + { { 0, 0x1e, 0x3e }, { 1, 0x12, 0x3f } }, + { { 0, 0x1e, 0x3f }, { 1, 0x12, 0x3f } }, + { { 0, 0x20, 0x3b }, { 1, 0x13, 0x3f } }, + { { 0, 0x1f, 0x3e }, { 1, 0x13, 0x3f } }, + { { 0, 0x1f, 0x3f }, { 1, 0x14, 0x3f } }, + { { 0, 0x20, 0x3d }, { 1, 0x14, 0x3f } }, + { { 0, 0x20, 0x3e }, { 1, 0x15, 0x3f } }, + { { 0, 0x20, 0x3f }, { 1, 0x15, 0x3f } }, + { { 0, 0x29, 0x2e }, { 1, 0x16, 0x3f } }, + { { 0, 0x21, 0x3e }, { 1, 0x16, 0x3f } }, + { { 0, 0x21, 0x3f }, { 1, 0x17, 0x3f } }, + { { 0, 0x22, 0x3e }, { 1, 0x17, 0x3f } }, + { { 0, 0x2a, 0x2f }, { 1, 0x18, 0x3f } }, + { { 0, 0x22, 0x3f }, { 1, 0x18, 0x3f } }, + { { 0, 0x23, 0x3e }, { 1, 0x19, 0x3f } }, + { { 0, 0x23, 0x3f }, { 1, 0x19, 0x3f } }, + { { 0, 0x2c, 0x2e }, { 1, 0x1a, 0x3f } }, + { { 0, 0x24, 0x3e }, { 1, 0x1a, 0x3f } }, + { { 0, 0x24, 0x3f }, { 1, 0x1b, 0x3f } }, + { { 0, 0x25, 0x3e }, { 1, 0x1b, 0x3f } }, + { { 0, 0x2d, 0x2f }, { 1, 0x1c, 0x3f } }, + { { 0, 0x25, 0x3f }, { 1, 0x1c, 0x3f } }, + { { 0, 0x26, 0x3e }, { 1, 0x1d, 0x3f } }, + { { 0, 0x26, 0x3f }, { 1, 0x1d, 0x3f } }, + { { 1, 0x1e, 0x3f }, { 0, 0x26, 0x3f } }, + { { 0, 0x27, 0x3e }, { 1, 0x1e, 0x3f } }, + { { 0, 0x27, 0x3f }, { 1, 0x1f, 0x3f } }, + { { 0, 0x28, 0x3e }, { 1, 0x1f, 0x3f } }, + { { 1, 0x20, 0x3f }, { 0, 0x28, 0x3e } }, + { { 0, 0x28, 0x3f }, { 1, 0x20, 0x3f } }, + { { 0, 0x29, 0x3e }, { 1, 0x21, 0x3f } }, + { { 0, 0x29, 0x3f }, { 1, 0x30, 0x30 } }, + { { 0, 0x30, 0x31 }, { 1, 0x22, 0x3f } }, + { { 0, 0x2a, 0x3e }, { 1, 0x30, 0x31 } }, + { { 0, 0x2a, 0x3f }, { 1, 0x23, 0x3f } }, + { { 0, 0x2b, 0x3e }, { 1, 0x30, 0x32 } }, + { { 0, 0x30, 0x34 }, { 1, 0x24, 0x3f } }, + { { 0, 0x2b, 0x3f }, { 1, 0x30, 0x33 } }, + { { 0, 0x2c, 0x3e }, { 1, 0x25, 0x3f } }, + { { 0, 0x2c, 0x3f }, { 1, 0x30, 0x34 } }, + { { 0, 0x30, 0x37 }, { 1, 0x26, 0x3f } }, + { { 0, 0x2d, 0x3e }, { 1, 0x30, 0x35 } }, + { { 0, 0x2d, 0x3f }, { 1, 0x27, 0x3f } }, + { { 0, 0x2e, 0x3e }, { 1, 0x30, 0x36 } }, + { { 0, 0x30, 0x3a }, { 1, 0x28, 0x3f } }, + { { 0, 0x2e, 0x3f }, { 1, 0x30, 0x37 } }, + { { 0, 0x2f, 0x3e }, { 1, 0x29, 0x3f } }, + { { 0, 0x2f, 0x3f }, { 1, 0x30, 0x38 } }, + { { 0, 0x30, 0x3d }, { 1, 0x2a, 0x3f } }, + { { 0, 0x30, 0x3e }, { 1, 0x30, 0x39 } }, + { { 1, 0x2b, 0x3f }, { 0, 0x30, 0x3e } }, + { { 0, 0x30, 0x3f }, { 1, 0x30, 0x3a } }, + { { 0, 0x31, 0x3e }, { 1, 0x2c, 0x3f } }, + { { 0, 0x31, 0x3f }, { 1, 0x30, 0x3b } }, + { { 1, 0x2d, 0x3f }, { 0, 0x31, 0x3f } }, + { { 0, 0x32, 0x3e }, { 1, 0x30, 0x3c } }, + { { 0, 0x32, 0x3f }, { 1, 0x2e, 0x3f } }, + { { 0, 0x33, 0x3e }, { 1, 0x30, 0x3d } }, + { { 1, 0x2f, 0x3f }, { 0, 0x33, 0x3e } }, + { { 0, 0x33, 0x3f }, { 1, 0x30, 0x3e } }, + { { 0, 0x34, 0x3e }, { 1, 0x30, 0x3e } }, + { { 0, 0x34, 0x3f }, { 1, 0x30, 0x3f } }, + { { 0, 0x34, 0x3f }, { 1, 0x30, 0x3f } }, + { { 0, 0x35, 0x3e }, { 1, 0x31, 0x3f } }, + { { 0, 0x35, 0x3f }, { 1, 0x31, 0x3f } }, + { { 0, 0x36, 0x3e }, { 1, 0x32, 0x3f } }, + { { 0, 0x36, 0x3e }, { 1, 0x32, 0x3f } }, + { { 0, 0x36, 0x3f }, { 1, 0x33, 0x3f } }, + { { 0, 0x37, 0x3e }, { 1, 0x33, 0x3f } }, + { { 0, 0x37, 0x3f }, { 1, 0x34, 0x3f } }, + { { 0, 0x37, 0x3f }, { 1, 0x34, 0x3f } }, + { { 0, 0x38, 0x3e }, { 1, 0x35, 0x3f } }, + { { 0, 0x38, 0x3f }, { 1, 0x35, 0x3f } }, + { { 0, 0x39, 0x3e }, { 1, 0x36, 0x3f } }, + { { 0, 0x39, 0x3e }, { 1, 0x36, 0x3f } }, + { { 0, 0x39, 0x3f }, { 1, 0x37, 0x3f } }, + { { 0, 0x3a, 0x3e }, { 1, 0x37, 0x3f } }, + { { 0, 0x3a, 0x3f }, { 1, 0x38, 0x3f } }, + { { 0, 0x3a, 0x3f }, { 1, 0x38, 0x3f } }, + { { 0, 0x3b, 0x3e }, { 1, 0x39, 0x3f } }, + { { 0, 0x3b, 0x3f }, { 1, 0x39, 0x3f } }, + { { 0, 0x3c, 0x3e }, { 1, 0x3a, 0x3f } }, + { { 0, 0x3c, 0x3e }, { 1, 0x3a, 0x3f } }, + { { 0, 0x3c, 0x3f }, { 1, 0x3b, 0x3f } }, + { { 0, 0x3d, 0x3e }, { 1, 0x3b, 0x3f } }, + { { 0, 0x3d, 0x3f }, { 1, 0x3c, 0x3f } }, + { { 0, 0x3d, 0x3f }, { 1, 0x3c, 0x3f } }, + { { 0, 0x3e, 0x3e }, { 1, 0x3d, 0x3f } }, + { { 0, 0x3e, 0x3f }, { 1, 0x3d, 0x3f } }, + { { 1, 0x3e, 0x3f }, { 0, 0x3e, 0x3f } }, + { { 0, 0x3f, 0x3f }, { 1, 0x3e, 0x3f } }, + { { 0, 0x3f, 0x3f }, { 1, 0x3f, 0x3f } } }; diff --git a/BPTCEncoder/src/BitStream.h b/BPTCEncoder/src/BitStream.h index d502ef3..3f30d6d 100755 --- a/BPTCEncoder/src/BitStream.h +++ b/BPTCEncoder/src/BitStream.h @@ -77,7 +77,7 @@ class BitStream { { } int GetBitsWritten() const { return m_BitsWritten; } - + ~BitStream() { } void WriteBitsR(unsigned int val, unsigned int nBits) { for(unsigned int i = 0; i < nBits; i++) { diff --git a/BPTCEncoder/src/RGBAEndpoints.cpp b/BPTCEncoder/src/RGBAEndpoints.cpp index cc9ba6e..8b2d5ef 100755 --- a/BPTCEncoder/src/RGBAEndpoints.cpp +++ b/BPTCEncoder/src/RGBAEndpoints.cpp @@ -89,22 +89,22 @@ static T max(const T &a, const T &b) { static const double kPi = 3.141592653589793238462643383279502884197; static const float kFloatConversion[256] = { - 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, - 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, - 32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f, - 48.0f, 49.0f, 50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, 61.0f, 62.0f, 63.0f, - 64.0f, 65.0f, 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, 76.0f, 77.0f, 78.0f, 79.0f, - 80.0f, 81.0f, 82.0f, 83.0f, 84.0f, 85.0f, 86.0f, 87.0f, 88.0f, 89.0f, 90.0f, 91.0f, 92.0f, 93.0f, 94.0f, 95.0f, - 96.0f, 97.0f, 98.0f, 99.0f, 100.0f, 101.0f, 102.0f, 103.0f, 104.0f, 105.0f, 106.0f, 107.0f, 108.0f, 109.0f, 110.0f, 111.0f, - 112.0f, 113.0f, 114.0f, 115.0f, 116.0f, 117.0f, 118.0f, 119.0f, 120.0f, 121.0f, 122.0f, 123.0f, 124.0f, 125.0f, 126.0f, 127.0f, - 128.0f, 129.0f, 130.0f, 131.0f, 132.0f, 133.0f, 134.0f, 135.0f, 136.0f, 137.0f, 138.0f, 139.0f, 140.0f, 141.0f, 142.0f, 143.0f, - 144.0f, 145.0f, 146.0f, 147.0f, 148.0f, 149.0f, 150.0f, 151.0f, 152.0f, 153.0f, 154.0f, 155.0f, 156.0f, 157.0f, 158.0f, 159.0f, - 160.0f, 161.0f, 162.0f, 163.0f, 164.0f, 165.0f, 166.0f, 167.0f, 168.0f, 169.0f, 170.0f, 171.0f, 172.0f, 173.0f, 174.0f, 175.0f, - 176.0f, 177.0f, 178.0f, 179.0f, 180.0f, 181.0f, 182.0f, 183.0f, 184.0f, 185.0f, 186.0f, 187.0f, 188.0f, 189.0f, 190.0f, 191.0f, - 192.0f, 193.0f, 194.0f, 195.0f, 196.0f, 197.0f, 198.0f, 199.0f, 200.0f, 201.0f, 202.0f, 203.0f, 204.0f, 205.0f, 206.0f, 207.0f, - 208.0f, 209.0f, 210.0f, 211.0f, 212.0f, 213.0f, 214.0f, 215.0f, 216.0f, 217.0f, 218.0f, 219.0f, 220.0f, 221.0f, 222.0f, 223.0f, - 224.0f, 225.0f, 226.0f, 227.0f, 228.0f, 229.0f, 230.0f, 231.0f, 232.0f, 233.0f, 234.0f, 235.0f, 236.0f, 237.0f, 238.0f, 239.0f, - 240.0f, 241.0f, 242.0f, 243.0f, 244.0f, 245.0f, 246.0f, 247.0f, 248.0f, 249.0f, 250.0f, 251.0f, 252.0f, 253.0f, 254.0f, 255.0f + 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, + 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, + 32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f, + 48.0f, 49.0f, 50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, 61.0f, 62.0f, 63.0f, + 64.0f, 65.0f, 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, 76.0f, 77.0f, 78.0f, 79.0f, + 80.0f, 81.0f, 82.0f, 83.0f, 84.0f, 85.0f, 86.0f, 87.0f, 88.0f, 89.0f, 90.0f, 91.0f, 92.0f, 93.0f, 94.0f, 95.0f, + 96.0f, 97.0f, 98.0f, 99.0f, 100.0f, 101.0f, 102.0f, 103.0f, 104.0f, 105.0f, 106.0f, 107.0f, 108.0f, 109.0f, 110.0f, 111.0f, + 112.0f, 113.0f, 114.0f, 115.0f, 116.0f, 117.0f, 118.0f, 119.0f, 120.0f, 121.0f, 122.0f, 123.0f, 124.0f, 125.0f, 126.0f, 127.0f, + 128.0f, 129.0f, 130.0f, 131.0f, 132.0f, 133.0f, 134.0f, 135.0f, 136.0f, 137.0f, 138.0f, 139.0f, 140.0f, 141.0f, 142.0f, 143.0f, + 144.0f, 145.0f, 146.0f, 147.0f, 148.0f, 149.0f, 150.0f, 151.0f, 152.0f, 153.0f, 154.0f, 155.0f, 156.0f, 157.0f, 158.0f, 159.0f, + 160.0f, 161.0f, 162.0f, 163.0f, 164.0f, 165.0f, 166.0f, 167.0f, 168.0f, 169.0f, 170.0f, 171.0f, 172.0f, 173.0f, 174.0f, 175.0f, + 176.0f, 177.0f, 178.0f, 179.0f, 180.0f, 181.0f, 182.0f, 183.0f, 184.0f, 185.0f, 186.0f, 187.0f, 188.0f, 189.0f, 190.0f, 191.0f, + 192.0f, 193.0f, 194.0f, 195.0f, 196.0f, 197.0f, 198.0f, 199.0f, 200.0f, 201.0f, 202.0f, 203.0f, 204.0f, 205.0f, 206.0f, 207.0f, + 208.0f, 209.0f, 210.0f, 211.0f, 212.0f, 213.0f, 214.0f, 215.0f, 216.0f, 217.0f, 218.0f, 219.0f, 220.0f, 221.0f, 222.0f, 223.0f, + 224.0f, 225.0f, 226.0f, 227.0f, 228.0f, 229.0f, 230.0f, 231.0f, 232.0f, 233.0f, 234.0f, 235.0f, 236.0f, 237.0f, 238.0f, 239.0f, + 240.0f, 241.0f, 242.0f, 243.0f, 244.0f, 245.0f, 246.0f, 247.0f, 248.0f, 249.0f, 250.0f, 251.0f, 252.0f, 253.0f, 254.0f, 255.0f }; /////////////////////////////////////////////////////////////////////////////// @@ -115,41 +115,41 @@ static const float kFloatConversion[256] = { static inline uint32 CountBitsInMask(uint8 n) { #if defined(_WIN64) || defined(__x86_64__) || defined(NO_INLINE_ASSEMBLY) - if(!n) return 0; // no bits set - if(!(n & (n-1))) return 1; // power of two + if(!n) return 0; // no bits set + if(!(n & (n-1))) return 1; // power of two - uint32 c; - for(c = 0; n; c++) { - n &= n - 1; - } - return c; + uint32 c; + for(c = 0; n; c++) { + n &= n - 1; + } + return c; #else #ifdef _MSC_VER - __asm { - mov eax, 8 - movzx ecx, n - bsf ecx, ecx - sub eax, ecx + __asm { + mov eax, 8 + movzx ecx, n + bsf ecx, ecx + sub eax, ecx } #else - uint32 ans; - __asm__("movl $8, %%eax;" - "movzbl %b1, %%ecx;" - "bsf %%ecx, %%ecx;" - "subl %%ecx, %%eax;" - "movl %%eax, %0;" - : "=Q"(ans) - : "b"(n) - : "%eax", "%ecx" - ); - return ans; -#endif + uint32 ans; + __asm__("movl $8, %%eax;" + "movzbl %b1, %%ecx;" + "bsf %%ecx, %%ecx;" + "subl %%ecx, %%eax;" + "movl %%eax, %0;" + : "=Q"(ans) + : "b"(n) + : "%eax", "%ecx" + ); + return ans; +#endif #endif } template static inline void clamp(ty &x, const ty &min, const ty &max) { - x = (x < min)? min : ((x > max)? max : x); + x = (x < min)? min : ((x > max)? max : x); } // absolute distance. It turns out the compiler does a much @@ -157,23 +157,23 @@ static inline void clamp(ty &x, const ty &min, const ty &max) { // translate the values to/from registers static uint8 sad(uint8 a, uint8 b) { #if 0 - __asm - { - movzx eax, a - movzx ecx, b - sub eax, ecx - jns done - neg eax + __asm + { + movzx eax, a + movzx ecx, b + sub eax, ecx + jns done + neg eax done: - } + } #else - //const INT d = a - b; - //const INT mask = d >> 31; - //return (d ^ mask) - mask; + //const INT d = a - b; + //const INT mask = d >> 31; + //return (d ^ mask) - mask; - // return abs(a - b); + // return abs(a - b); - return (a > b)? a - b : b - a; + return (a > b)? a - b : b - a; #endif } @@ -186,55 +186,55 @@ done: uint8 QuantizeChannel(const uint8 val, const uint8 mask, const int pBit) { - // If the mask is all the bits, then we can just return the value. - if(mask == 0xFF) { + // If the mask is all the bits, then we can just return the value. + if(mask == 0xFF) { return val; - } + } - // Otherwise if the mask is no bits then we'll assume that they want - // all the bits ... this is only really relevant for alpha... - if(mask == 0x0) { - return 0xFF; - } + // Otherwise if the mask is no bits then we'll assume that they want + // all the bits ... this is only really relevant for alpha... + if(mask == 0x0) { + return 0xFF; + } - uint32 prec = CountBitsInMask(mask); - const uint32 step = 1 << (8 - prec); + uint32 prec = CountBitsInMask(mask); + const uint32 step = 1 << (8 - prec); - assert(step-1 == uint8(~mask)); + assert(step-1 == uint8(~mask)); - uint32 lval = val & mask; - uint32 hval = lval + step; + uint32 lval = val & mask; + uint32 hval = lval + step; - if(pBit >= 0) { - prec++; - lval |= !!(pBit) << (8 - prec); - hval |= !!(pBit) << (8 - prec); - } + if(pBit >= 0) { + prec++; + lval |= !!(pBit) << (8 - prec); + hval |= !!(pBit) << (8 - prec); + } - if(lval > val) { - lval -= step; - hval -= step; - } + if(lval > val) { + lval -= step; + hval -= step; + } - lval |= lval >> prec; - hval |= hval >> prec; + lval |= lval >> prec; + hval |= hval >> prec; - if(sad(val, lval) < sad(val, hval)) - return lval; - else - return hval; + if(sad(val, lval) < sad(val, hval)) + return lval; + else + return hval; } uint32 RGBAVector::ToPixel(const uint32 channelMask, const int pBit) const { - const uint8 pRet0 = QuantizeChannel(uint32(r + 0.5) & 0xFF, channelMask & 0xFF, pBit); - const uint8 pRet1 = QuantizeChannel(uint32(g + 0.5) & 0xFF, (channelMask >> 8) & 0xFF, pBit); - const uint8 pRet2 = QuantizeChannel(uint32(b + 0.5) & 0xFF, (channelMask >> 16) & 0xFF, pBit); - const uint8 pRet3 = QuantizeChannel(uint32(a + 0.5) & 0xFF, (channelMask >> 24) & 0xFF, pBit); + const uint8 pRet0 = QuantizeChannel(uint32(r + 0.5) & 0xFF, channelMask & 0xFF, pBit); + const uint8 pRet1 = QuantizeChannel(uint32(g + 0.5) & 0xFF, (channelMask >> 8) & 0xFF, pBit); + const uint8 pRet2 = QuantizeChannel(uint32(b + 0.5) & 0xFF, (channelMask >> 16) & 0xFF, pBit); + const uint8 pRet3 = QuantizeChannel(uint32(a + 0.5) & 0xFF, (channelMask >> 24) & 0xFF, pBit); - const uint32 ret = pRet0 | (pRet1 << 8) | (pRet2 << 16) | (pRet3 << 24); + const uint32 ret = pRet0 | (pRet1 << 8) | (pRet2 << 16) | (pRet3 << 24); - return ret; + return ret; } /////////////////////////////////////////////////////////////////////////////// @@ -244,85 +244,85 @@ uint32 RGBAVector::ToPixel(const uint32 channelMask, const int pBit) const { /////////////////////////////////////////////////////////////////////////////// RGBAMatrix &RGBAMatrix::operator *=(const RGBAMatrix &mat) { - *this = ((*this) * mat); - return (*this); + *this = ((*this) * mat); + return (*this); } RGBAMatrix RGBAMatrix::operator *(const RGBAMatrix &mat) const { - RGBAMatrix result; + RGBAMatrix result; - for(int i = 0; i < 4; i++) { - for(int j = 0; j < 4; j++) { + for(int i = 0; i < 4; i++) { + for(int j = 0; j < 4; j++) { - result(i, j) = 0.0f; - for(int k = 0; k < 4; k++) { - result(i, j) += m[i*4 + k] * mat.m[k*4 + j]; - } - } - } + result(i, j) = 0.0f; + for(int k = 0; k < 4; k++) { + result(i, j) += m[i*4 + k] * mat.m[k*4 + j]; + } + } + } - return result; + return result; } RGBAVector RGBAMatrix::operator *(const RGBAVector &p) const { - return RGBAVector ( - p.x * m1 + p.y * m2 + p.z * m3 + p.w * m4, - p.x * m5 + p.y * m6 + p.z * m7 + p.w * m8, - p.x * m9 + p.y * m10 + p.z * m11 + p.w * m12, - p.x * m13 + p.y * m14 + p.z * m15 + p.w * m16 - ); + return RGBAVector ( + p.x * m1 + p.y * m2 + p.z * m3 + p.w * m4, + p.x * m5 + p.y * m6 + p.z * m7 + p.w * m8, + p.x * m9 + p.y * m10 + p.z * m11 + p.w * m12, + p.x * m13 + p.y * m14 + p.z * m15 + p.w * m16 + ); } RGBAMatrix RGBAMatrix::RotateX(float rad) { - RGBAMatrix result; - result.m6 = result.m11 = cos(rad); - result.m10 = sin(rad); - result.m7 = -result.m10; - return result; + RGBAMatrix result; + result.m6 = result.m11 = cos(rad); + result.m10 = sin(rad); + result.m7 = -result.m10; + return result; } RGBAMatrix RGBAMatrix::RotateY(float rad) { - RGBAMatrix result; - result.m1 = result.m11 = cos(rad); - result.m3 = sin(rad); - result.m9 = -result.m3; - return result; + RGBAMatrix result; + result.m1 = result.m11 = cos(rad); + result.m3 = sin(rad); + result.m9 = -result.m3; + return result; } RGBAMatrix RGBAMatrix::RotateZ(float rad) { - RGBAMatrix result; - result.m1 = result.m6 = cos(rad); - result.m5 = sin(rad); - result.m2 = -result.m5; - return result; + RGBAMatrix result; + result.m1 = result.m6 = cos(rad); + result.m5 = sin(rad); + result.m2 = -result.m5; + return result; } RGBAMatrix RGBAMatrix::Translate(const RGBAVector &t) { - RGBAMatrix result; - result.m4 = t.x; - result.m8 = t.y; - result.m12 = t.z; - result.m16 = t.w; - return result; + RGBAMatrix result; + result.m4 = t.x; + result.m8 = t.y; + result.m12 = t.z; + result.m16 = t.w; + return result; } bool RGBAMatrix::Identity() { - for(int i = 0; i < 4; i++) { - for(int j = 0; j < 4; j++) { + for(int i = 0; i < 4; i++) { + for(int j = 0; j < 4; j++) { - if(i == j) { - if(fabs(m[i*4 + j] - 1.0f) > 1e-5) - return false; - } - else { - if(fabs(m[i*4 + j]) > 1e-5) - return false; - } - } - } + if(i == j) { + if(fabs(m[i*4 + j] - 1.0f) > 1e-5) + return false; + } + else { + if(fabs(m[i*4 + j]) > 1e-5) + return false; + } + } + } - return true; + return true; } /////////////////////////////////////////////////////////////////////////////// @@ -332,45 +332,45 @@ bool RGBAMatrix::Identity() { /////////////////////////////////////////////////////////////////////////////// RGBACluster::RGBACluster(const RGBACluster &left, const RGBACluster &right) { - *this = left; - for(uint32 i = 0; i < right.m_NumPoints; i++) { - const RGBAVector &p = right.m_DataPoints[i]; - AddPoint(p); - } + *this = left; + for(uint32 i = 0; i < right.m_NumPoints; i++) { + const RGBAVector &p = right.m_DataPoints[i]; + AddPoint(p); + } - m_PrincipalAxisCached = false; -} + m_PrincipalAxisCached = false; +} void RGBACluster::AddPoint(const RGBAVector &p) { - assert(m_NumPoints < kMaxNumDataPoints); - m_Total += p; - m_DataPoints[m_NumPoints++] = p; - m_PointBitString |= 1 << p.GetIdx(); + assert(m_NumPoints < kMaxNumDataPoints); + m_Total += p; + m_DataPoints[m_NumPoints++] = p; + m_PointBitString |= 1 << p.GetIdx(); - for(uint32 i = 0; i < kNumColorChannels; i++) { - m_Min.c[i] = min(p.c[i], m_Min.c[i]); - m_Max.c[i] = max(p.c[i], m_Max.c[i]); - } + for(uint32 i = 0; i < kNumColorChannels; i++) { + m_Min.c[i] = min(p.c[i], m_Min.c[i]); + m_Max.c[i] = max(p.c[i], m_Max.c[i]); + } } void RGBACluster::GetPrincipalAxis(RGBADir &axis) { - if(m_PrincipalAxisCached) { - axis = m_PrincipalAxis; - return; - } + if(m_PrincipalAxisCached) { + axis = m_PrincipalAxis; + return; + } - m_PowerMethodIterations = ::GetPrincipalAxis( - m_NumPoints, - m_DataPoints, - m_PrincipalAxis, - m_PrincipalEigenvalue, - &m_SecondEigenvalue - ); + m_PowerMethodIterations = ::GetPrincipalAxis( + m_NumPoints, + m_DataPoints, + m_PrincipalAxis, + m_PrincipalEigenvalue, + &m_SecondEigenvalue + ); - m_PrincipalAxisCached = true; + m_PrincipalAxisCached = true; - GetPrincipalAxis(axis); + GetPrincipalAxis(axis); } double RGBACluster::GetPrincipalEigenvalue() { @@ -408,74 +408,74 @@ uint32 RGBACluster::GetPowerMethodIterations() { double RGBACluster::QuantizedError(const RGBAVector &p1, const RGBAVector &p2, uint8 nBuckets, uint32 bitMask, const RGBAVector &errorMetricVec, const int pbits[2], int *indices) const { - // nBuckets should be a power of two. - assert(nBuckets == 3 || !(nBuckets & (nBuckets - 1))); + // nBuckets should be a power of two. + assert(nBuckets == 3 || !(nBuckets & (nBuckets - 1))); - const uint8 indexPrec = (nBuckets == 3)? 3 : 8-CountBitsInMask(~(nBuckets - 1)); - - typedef uint32 tInterpPair[2]; - typedef tInterpPair tInterpLevel[16]; - const tInterpLevel *interpVals = (nBuckets == 3)? kBC7InterpolationValues : kBC7InterpolationValues + (indexPrec - 1); + const uint8 indexPrec = (nBuckets == 3)? 3 : 8-CountBitsInMask(~(nBuckets - 1)); + + typedef uint32 tInterpPair[2]; + typedef tInterpPair tInterpLevel[16]; + const tInterpLevel *interpVals = (nBuckets == 3)? kBC7InterpolationValues : kBC7InterpolationValues + (indexPrec - 1); - assert(indexPrec >= 2 && indexPrec <= 4); + assert(indexPrec >= 2 && indexPrec <= 4); - uint32 qp1, qp2; - if(pbits) { - qp1 = p1.ToPixel(bitMask, pbits[0]); - qp2 = p2.ToPixel(bitMask, pbits[1]); - } - else { - qp1 = p1.ToPixel(bitMask); - qp2 = p2.ToPixel(bitMask); - } + uint32 qp1, qp2; + if(pbits) { + qp1 = p1.ToPixel(bitMask, pbits[0]); + qp2 = p2.ToPixel(bitMask, pbits[1]); + } + else { + qp1 = p1.ToPixel(bitMask); + qp2 = p2.ToPixel(bitMask); + } - uint8 *pqp1 = (uint8 *)&qp1; - uint8 *pqp2 = (uint8 *)&qp2; + uint8 *pqp1 = (uint8 *)&qp1; + uint8 *pqp2 = (uint8 *)&qp2; - const RGBAVector metric = errorMetricVec; + const RGBAVector metric = errorMetricVec; - float totalError = 0.0; - for(uint32 i = 0; i < m_NumPoints; i++) { + float totalError = 0.0; + for(uint32 i = 0; i < m_NumPoints; i++) { - const uint32 pixel = m_DataPoints[i].ToPixel(); - const uint8 *pb = (const uint8 *)(&pixel); + const uint32 pixel = m_DataPoints[i].ToPixel(); + const uint8 *pb = (const uint8 *)(&pixel); - float minError = FLT_MAX; - int bestBucket = -1; - for(int j = 0; j < nBuckets; j++) { + float minError = FLT_MAX; + int bestBucket = -1; + for(int j = 0; j < nBuckets; j++) { - uint32 interp0 = (*interpVals)[j][0]; - uint32 interp1 = (*interpVals)[j][1]; + uint32 interp0 = (*interpVals)[j][0]; + uint32 interp1 = (*interpVals)[j][1]; - RGBAVector errorVec (0.0f); - for(uint32 k = 0; k < kNumColorChannels; k++) { - const uint8 ip = (((uint32(pqp1[k]) * interp0) + (uint32(pqp2[k]) * interp1) + 32) >> 6) & 0xFF; - const uint8 dist = sad(pb[k], ip); - errorVec.c[k] = kFloatConversion[dist] * metric.c[k]; - } - - float error = errorVec * errorVec; - if(error < minError) { - minError = error; - bestBucket = j; - } + RGBAVector errorVec (0.0f); + for(uint32 k = 0; k < kNumColorChannels; k++) { + const uint8 ip = (((uint32(pqp1[k]) * interp0) + (uint32(pqp2[k]) * interp1) + 32) >> 6) & 0xFF; + const uint8 dist = sad(pb[k], ip); + errorVec.c[k] = kFloatConversion[dist] * metric.c[k]; + } + + float error = errorVec * errorVec; + if(error < minError) { + minError = error; + bestBucket = j; + } - // Conceptually, once the error starts growing, it doesn't stop growing (we're moving - // farther away from the reference point along the line). Hence we can early out here. - // However, quanitzation artifacts mean that this is not ALWAYS the case, so we do suffer - // about 0.01 RMS error. - else if(error > minError) { - break; - } - } + // Conceptually, once the error starts growing, it doesn't stop growing (we're moving + // farther away from the reference point along the line). Hence we can early out here. + // However, quanitzation artifacts mean that this is not ALWAYS the case, so we do suffer + // about 0.01 RMS error. + else if(error > minError) { + break; + } + } - totalError += minError; + totalError += minError; - assert(bestBucket >= 0); - if(indices) indices[i] = bestBucket; - } + assert(bestBucket >= 0); + if(indices) indices[i] = bestBucket; + } - return totalError; + return totalError; } /////////////////////////////////////////////////////////////////////////////// @@ -485,175 +485,174 @@ double RGBACluster::QuantizedError(const RGBAVector &p1, const RGBAVector &p2, u /////////////////////////////////////////////////////////////////////////////// void ClampEndpoints(RGBAVector &p1, RGBAVector &p2) { - clamp(p1.r, 0.0f, 255.0f); - clamp(p1.g, 0.0f, 255.0f); - clamp(p1.b, 0.0f, 255.0f); - clamp(p1.a, 0.0f, 255.0f); + clamp(p1.r, 0.0f, 255.0f); + clamp(p1.g, 0.0f, 255.0f); + clamp(p1.b, 0.0f, 255.0f); + clamp(p1.a, 0.0f, 255.0f); - clamp(p2.r, 0.0f, 255.0f); - clamp(p2.g, 0.0f, 255.0f); - clamp(p2.b, 0.0f, 255.0f); - clamp(p2.a, 0.0f, 255.0f); + clamp(p2.r, 0.0f, 255.0f); + clamp(p2.g, 0.0f, 255.0f); + clamp(p2.b, 0.0f, 255.0f); + clamp(p2.a, 0.0f, 255.0f); } static uint32 PowerIteration(const RGBAMatrix &mat, RGBADir &eigVec, double &eigVal) { - int numIterations = 0; - const int kMaxNumIterations = 200; + int numIterations = 0; + const int kMaxNumIterations = 200; - for(int nTries = 0; nTries < 3; nTries++) { - // !SPEED! Find eigenvectors by using the power method. This is good because the - // matrix is only 4x4, which allows us to use SIMD... + for(int nTries = 0; nTries < 3; nTries++) { + // !SPEED! Find eigenvectors by using the power method. This is good because the + // matrix is only 4x4, which allows us to use SIMD... RGBAVector b = RGBAVector(float(rand()) + 1.0f); b /= b.Length(); - bool fixed = false; - numIterations = 0; - while(!fixed && ++numIterations < kMaxNumIterations) { + bool fixed = false; + numIterations = 0; + while(!fixed && ++numIterations < kMaxNumIterations) { - RGBAVector newB = mat * b; + RGBAVector newB = mat * b; - // !HACK! If the principal eigenvector of the covariance matrix - // converges to zero, that means that the points lie equally - // spaced on a sphere in this space. In this (extremely rare) - // situation, just choose a point and use it as the principal - // direction. - const float newBlen = newB.Length(); - if(newBlen < 1e-10) { - eigVec = b; - eigVal = 0.0; - return numIterations; - } + // !HACK! If the principal eigenvector of the covariance matrix + // converges to zero, that means that the points lie equally + // spaced on a sphere in this space. In this (extremely rare) + // situation, just choose a point and use it as the principal + // direction. + const float newBlen = newB.Length(); + if(newBlen < 1e-10) { + eigVec = b; + eigVal = 0.0; + return numIterations; + } - eigVal = newB.Length(); - newB /= float(eigVal); + eigVal = newB.Length(); + newB /= float(eigVal); - if(fabs(1.0f - (b * newB)) < 1e-5) - fixed = true; + if(fabs(1.0f - (b * newB)) < 1e-5) + fixed = true; - b = newB; - } - - eigVec = b; - if(numIterations < kMaxNumIterations) { - break; - } + b = newB; } - if(numIterations == kMaxNumIterations) { - eigVal = 0.0; - } - return numIterations; + eigVec = b; + if(numIterations < kMaxNumIterations) { + break; + } + } + + if(numIterations == kMaxNumIterations) { + eigVal = 0.0; + } + return numIterations; } uint32 GetPrincipalAxis(uint32 nPts, const RGBAVector *pts, RGBADir &axis, double &eigOne, double *eigTwo) { - assert(nPts <= kMaxNumDataPoints); + assert(nPts <= kMaxNumDataPoints); - RGBAVector avg (0.0f); - for(uint32 i = 0; i < nPts; i++) { - avg += pts[i]; - } - avg /= float(nPts); + RGBAVector avg (0.0f); + for(uint32 i = 0; i < nPts; i++) { + avg += pts[i]; + } + avg /= float(nPts); - // We use these vectors for calculating the covariance matrix... - RGBAVector toPts[kMaxNumDataPoints]; - RGBAVector toPtsMax(-FLT_MAX); - for(uint32 i = 0; i < nPts; i++) { - toPts[i] = pts[i] - avg; + // We use these vectors for calculating the covariance matrix... + RGBAVector toPts[kMaxNumDataPoints]; + RGBAVector toPtsMax(-FLT_MAX); + for(uint32 i = 0; i < nPts; i++) { + toPts[i] = pts[i] - avg; - for(uint32 j = 0; j < kNumColorChannels; j++) { - toPtsMax.c[j] = max(toPtsMax.c[j], toPts[i].c[j]); - } - } + for(uint32 j = 0; j < kNumColorChannels; j++) { + toPtsMax.c[j] = max(toPtsMax.c[j], toPts[i].c[j]); + } + } - // Generate a list of unique points... - RGBAVector upts[kMaxNumDataPoints]; - uint32 uptsIdx = 0; - for(uint32 i = 0; i < nPts; i++) { - - bool hasPt = false; - for(uint32 j = 0; j < uptsIdx; j++) { - if(upts[j] == pts[i]) - hasPt = true; - } + // Generate a list of unique points... + RGBAVector upts[kMaxNumDataPoints]; + uint32 uptsIdx = 0; + for(uint32 i = 0; i < nPts; i++) { + + bool hasPt = false; + for(uint32 j = 0; j < uptsIdx; j++) { + if(upts[j] == pts[i]) + hasPt = true; + } - if(!hasPt) { - upts[uptsIdx++] = pts[i]; - } - } + if(!hasPt) { + upts[uptsIdx++] = pts[i]; + } + } - assert(uptsIdx > 0); + assert(uptsIdx > 0); - if(uptsIdx == 1) { - axis.r = axis.g = axis.b = axis.a = 0.0f; - return 0; - } - // Collinear? - else { + if(uptsIdx == 1) { + axis.r = axis.g = axis.b = axis.a = 0.0f; + return 0; - RGBADir dir (upts[1] - upts[0]); - bool collinear = true; - for(uint32 i = 2; i < nPts; i++) { - RGBAVector v = (upts[i] - upts[0]); - if(fabs(fabs(v*dir) - v.Length()) > 1e-7) { - collinear = false; - break; - } - } + // Collinear? + } else { + RGBADir dir (upts[1] - upts[0]); + bool collinear = true; + for(uint32 i = 2; i < nPts; i++) { + RGBAVector v = (upts[i] - upts[0]); + if(fabs(fabs(v*dir) - v.Length()) > 1e-7) { + collinear = false; + break; + } + } - if(collinear) { - axis = dir; - return 0; - } - } + if(collinear) { + axis = dir; + return 0; + } + } - RGBAMatrix covMatrix; + RGBAMatrix covMatrix; - // Compute covariance. - for(uint32 i = 0; i < kNumColorChannels; i++) { - for(uint32 j = 0; j <= i; j++) { + // Compute covariance. + for(uint32 i = 0; i < kNumColorChannels; i++) { + for(uint32 j = 0; j <= i; j++) { - float sum = 0.0; - for(uint32 k = 0; k < nPts; k++) { - sum += toPts[k].c[i] * toPts[k].c[j]; - } + float sum = 0.0; + for(uint32 k = 0; k < nPts; k++) { + sum += toPts[k].c[i] * toPts[k].c[j]; + } - covMatrix(i, j) = sum / kFloatConversion[kNumColorChannels - 1]; - covMatrix(j, i) = covMatrix(i, j); - } - } - - uint32 iters = PowerIteration(covMatrix, axis, eigOne); + covMatrix(i, j) = sum / kFloatConversion[kNumColorChannels - 1]; + covMatrix(j, i) = covMatrix(i, j); + } + } + + uint32 iters = PowerIteration(covMatrix, axis, eigOne); - if(NULL != eigTwo) { - if(eigOne != 0.0) { - RGBAMatrix reduced = covMatrix - eigOne * RGBAMatrix( - axis.c[0] * axis.c[0], axis.c[0] * axis.c[1], axis.c[0] * axis.c[2], axis.c[0] * axis.c[3], - axis.c[1] * axis.c[0], axis.c[1] * axis.c[1], axis.c[1] * axis.c[2], axis.c[1] * axis.c[3], - axis.c[2] * axis.c[0], axis.c[2] * axis.c[1], axis.c[2] * axis.c[2], axis.c[2] * axis.c[3], - axis.c[3] * axis.c[0], axis.c[3] * axis.c[1], axis.c[3] * axis.c[2], axis.c[3] * axis.c[3] - ); - - bool allZero = true; - for(uint32 i = 0; i < 16; i++) { - if(fabs(reduced[i]) > 0.0005) { - allZero = false; - } - } - - if(allZero) { - *eigTwo = 0.0; - } - else { - RGBADir dummyDir; - iters += PowerIteration(reduced, dummyDir, *eigTwo); - } - } - else { - *eigTwo = 0.0; - } + if(NULL != eigTwo) { + if(eigOne != 0.0) { + RGBAMatrix reduced = covMatrix - eigOne * RGBAMatrix( + axis.c[0] * axis.c[0], axis.c[0] * axis.c[1], axis.c[0] * axis.c[2], axis.c[0] * axis.c[3], + axis.c[1] * axis.c[0], axis.c[1] * axis.c[1], axis.c[1] * axis.c[2], axis.c[1] * axis.c[3], + axis.c[2] * axis.c[0], axis.c[2] * axis.c[1], axis.c[2] * axis.c[2], axis.c[2] * axis.c[3], + axis.c[3] * axis.c[0], axis.c[3] * axis.c[1], axis.c[3] * axis.c[2], axis.c[3] * axis.c[3] + ); + + bool allZero = true; + for(uint32 i = 0; i < 16; i++) { + if(fabs(reduced[i]) > 0.0005) { + allZero = false; } + } - return iters; + if(allZero) { + *eigTwo = 0.0; + } + else { + RGBADir dummyDir; + iters += PowerIteration(reduced, dummyDir, *eigTwo); + } + } + else { + *eigTwo = 0.0; + } + } + + return iters; } diff --git a/BPTCEncoder/src/RGBAEndpoints.h b/BPTCEncoder/src/RGBAEndpoints.h index 8617890..418c0d6 100755 --- a/BPTCEncoder/src/RGBAEndpoints.h +++ b/BPTCEncoder/src/RGBAEndpoints.h @@ -78,260 +78,260 @@ static const uint32 kMaxNumDataPoints = 16; class RGBAVector { public: - union { - struct { float r, g, b, a; }; - struct { float x, y, z, w; }; - float c[4]; - }; + union { + struct { float r, g, b, a; }; + struct { float x, y, z, w; }; + float c[4]; + }; - uint32 GetIdx() const { return idx; } + uint32 GetIdx() const { return idx; } - RGBAVector() : r(-1.0), g(-1.0), b(-1.0), a(-1.0) { } - RGBAVector(uint32 _idx, uint32 pixel) : - r(float(pixel & 0xFF)), - g(float((pixel >> 8) & 0xFF)), - b(float((pixel >> 16) & 0xFF)), - a(float((pixel >> 24) & 0xFF)), - idx(_idx) - { } + RGBAVector() : r(-1.0), g(-1.0), b(-1.0), a(-1.0) { } + RGBAVector(uint32 _idx, uint32 pixel) : + r(float(pixel & 0xFF)), + g(float((pixel >> 8) & 0xFF)), + b(float((pixel >> 16) & 0xFF)), + a(float((pixel >> 24) & 0xFF)), + idx(_idx) + { } - RGBAVector(float _r, float _g, float _b, float _a) : - r(_r), g(_g), b(_b), a(_a), idx(0) { } + RGBAVector(float _r, float _g, float _b, float _a) : + r(_r), g(_g), b(_b), a(_a), idx(0) { } - explicit RGBAVector(float cc) : r(cc), g(cc), b(cc), a(cc), idx(0) { } + explicit RGBAVector(float cc) : r(cc), g(cc), b(cc), a(cc), idx(0) { } - RGBAVector &operator =(const RGBAVector &other) { - this->idx = other.idx; - memcpy(c, other.c, sizeof(c)); - return (*this); - } + RGBAVector &operator =(const RGBAVector &other) { + this->idx = other.idx; + memcpy(c, other.c, sizeof(c)); + return (*this); + } - RGBAVector operator +(const RGBAVector &p) const { - return RGBAVector(r + p.r, g + p.g, b + p.b, a + p.a); - } + RGBAVector operator +(const RGBAVector &p) const { + return RGBAVector(r + p.r, g + p.g, b + p.b, a + p.a); + } - RGBAVector &operator +=(const RGBAVector &p) { - r += p.r; g += p.g; b += p.b; a += p.a; - return *this; - } + RGBAVector &operator +=(const RGBAVector &p) { + r += p.r; g += p.g; b += p.b; a += p.a; + return *this; + } - RGBAVector operator -(const RGBAVector &p) const { - return RGBAVector(r - p.r, g - p.g, b - p.b, a - p.a); - } + RGBAVector operator -(const RGBAVector &p) const { + return RGBAVector(r - p.r, g - p.g, b - p.b, a - p.a); + } - RGBAVector &operator -=(const RGBAVector &p) { - r -= p.r; g -= p.g; b -= p.b; a -= p.a; - return *this; - } + RGBAVector &operator -=(const RGBAVector &p) { + r -= p.r; g -= p.g; b -= p.b; a -= p.a; + return *this; + } - RGBAVector operator /(const float s) const { - return RGBAVector(r / s, g / s, b / s, a / s); - } + RGBAVector operator /(const float s) const { + return RGBAVector(r / s, g / s, b / s, a / s); + } - RGBAVector &operator /=(const float s) { - r /= s; g /= s; b /= s; a /= s; - return *this; - } + RGBAVector &operator /=(const float s) { + r /= s; g /= s; b /= s; a /= s; + return *this; + } - float operator *(const RGBAVector &p) const { - return r * p.r + g * p.g + b * p.b + a * p.a; - } + float operator *(const RGBAVector &p) const { + return r * p.r + g * p.g + b * p.b + a * p.a; + } - float Length() const { - return sqrt((*this) * (*this)); - } + float Length() const { + return sqrt((*this) * (*this)); + } - RGBAVector &operator *=(const RGBAVector &v) { - r *= v.r; g *= v.g; b *= v.b; a *= v.a; - return *this; - } + RGBAVector &operator *=(const RGBAVector &v) { + r *= v.r; g *= v.g; b *= v.b; a *= v.a; + return *this; + } - RGBAVector operator *(const float s) const { - return RGBAVector(r * s, g * s, b * s, a * s); - } + RGBAVector operator *(const float s) const { + return RGBAVector(r * s, g * s, b * s, a * s); + } - friend RGBAVector operator *(const float s, const RGBAVector &p) { - return RGBAVector(p.r * s, p.g * s, p.b * s, p.a * s); - } + friend RGBAVector operator *(const float s, const RGBAVector &p) { + return RGBAVector(p.r * s, p.g * s, p.b * s, p.a * s); + } - RGBAVector &operator *=(const float s) { - r *= s; g *= s; b *= s; a *= s; - return *this; - } + RGBAVector &operator *=(const float s) { + r *= s; g *= s; b *= s; a *= s; + return *this; + } - float &operator [](const int i) { - return c[i]; - } + float &operator [](const int i) { + return c[i]; + } - friend bool operator ==(const RGBAVector &rhs, const RGBAVector &lhs) { - const RGBAVector d = rhs - lhs; - return fabs(d.r) < 1e-7 && fabs(d.g) < 1e-7 && fabs(d.b) < 1e-7 && fabs(d.a) < 1e-7; - } + friend bool operator ==(const RGBAVector &rhs, const RGBAVector &lhs) { + const RGBAVector d = rhs - lhs; + return fabs(d.r) < 1e-7 && fabs(d.g) < 1e-7 && fabs(d.b) < 1e-7 && fabs(d.a) < 1e-7; + } - friend bool operator !=(const RGBAVector &rhs, const RGBAVector &lhs) { - return !(rhs == lhs); - } + friend bool operator !=(const RGBAVector &rhs, const RGBAVector &lhs) { + return !(rhs == lhs); + } - operator float *() { - return c; - } + operator float *() { + return c; + } - RGBAVector Cross(const RGBAVector &rhs) { - return RGBAVector( - rhs.y * z - y * rhs.z, - rhs.z * x - z * rhs.x, - rhs.x * y - x * rhs.y, - 1.0f - ); - } + RGBAVector Cross(const RGBAVector &rhs) { + return RGBAVector( + rhs.y * z - y * rhs.z, + rhs.z * x - z * rhs.x, + rhs.x * y - x * rhs.y, + 1.0f + ); + } - // Quantize this point. - uint32 ToPixel(const uint32 channelMask = 0xFFFFFFFF, const int pBit = -1) const; + // Quantize this point. + uint32 ToPixel(const uint32 channelMask = 0xFFFFFFFF, const int pBit = -1) const; private: - uint32 idx; + uint32 idx; }; class RGBAMatrix { private: - union { - float m[kNumColorChannels*kNumColorChannels]; - struct { - float m1, m2, m3, m4; - float m5, m6, m7, m8; - float m9, m10, m11, m12; - float m13, m14, m15, m16; - }; - }; + union { + float m[kNumColorChannels*kNumColorChannels]; + struct { + float m1, m2, m3, m4; + float m5, m6, m7, m8; + float m9, m10, m11, m12; + float m13, m14, m15, m16; + }; + }; - RGBAMatrix(const float *arr) { - memcpy(m, arr, sizeof(m)); - } + RGBAMatrix(const float *arr) { + memcpy(m, arr, sizeof(m)); + } public: - RGBAMatrix( - float _m1, float _m2, float _m3, float _m4, - float _m5, float _m6, float _m7, float _m8, - float _m9, float _m10, float _m11, float _m12, - float _m13, float _m14, float _m15, float _m16 - ) : - m1(_m1), m2(_m2), m3(_m3), m4(_m4), - m5(_m5), m6(_m6), m7(_m7), m8(_m8), - m9(_m9), m10(_m10), m11(_m11), m12(_m12), - m13(_m13), m14(_m14), m15(_m15), m16(_m16) - { } - - RGBAMatrix() : - m1(1.0f), m2(0.0f), m3(0.0f), m4(0.0f), - m5(0.0f), m6(1.0f), m7(0.0f), m8(0.0f), - m9(0.0f), m10(0.0f), m11(1.0f), m12(0.0f), - m13(0.0f), m14(0.0f), m15(0.0f), m16(1.0f) - { } + RGBAMatrix( + float _m1, float _m2, float _m3, float _m4, + float _m5, float _m6, float _m7, float _m8, + float _m9, float _m10, float _m11, float _m12, + float _m13, float _m14, float _m15, float _m16 + ) : + m1(_m1), m2(_m2), m3(_m3), m4(_m4), + m5(_m5), m6(_m6), m7(_m7), m8(_m8), + m9(_m9), m10(_m10), m11(_m11), m12(_m12), + m13(_m13), m14(_m14), m15(_m15), m16(_m16) + { } + + RGBAMatrix() : + m1(1.0f), m2(0.0f), m3(0.0f), m4(0.0f), + m5(0.0f), m6(1.0f), m7(0.0f), m8(0.0f), + m9(0.0f), m10(0.0f), m11(1.0f), m12(0.0f), + m13(0.0f), m14(0.0f), m15(0.0f), m16(1.0f) + { } - RGBAMatrix &operator =(const RGBAMatrix &other) { - memcpy(m, other.m, sizeof(m)); - return (*this); - } + RGBAMatrix &operator =(const RGBAMatrix &other) { + memcpy(m, other.m, sizeof(m)); + return (*this); + } - RGBAMatrix operator +(const RGBAMatrix &p) const { - float newm[kNumColorChannels*kNumColorChannels]; - for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] + p.m[i]; - return RGBAMatrix(newm); - } + RGBAMatrix operator +(const RGBAMatrix &p) const { + float newm[kNumColorChannels*kNumColorChannels]; + for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] + p.m[i]; + return RGBAMatrix(newm); + } - RGBAMatrix &operator +=(const RGBAMatrix &p) { - for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] += p.m[i]; - return *this; - } + RGBAMatrix &operator +=(const RGBAMatrix &p) { + for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] += p.m[i]; + return *this; + } - RGBAMatrix operator -(const RGBAMatrix &p) const { - float newm[kNumColorChannels*kNumColorChannels]; - for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] - p.m[i]; - return RGBAMatrix(newm); - } + RGBAMatrix operator -(const RGBAMatrix &p) const { + float newm[kNumColorChannels*kNumColorChannels]; + for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] - p.m[i]; + return RGBAMatrix(newm); + } - RGBAMatrix &operator -=(const RGBAMatrix &p) { - for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] -= p.m[i]; - return *this; - } + RGBAMatrix &operator -=(const RGBAMatrix &p) { + for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] -= p.m[i]; + return *this; + } - RGBAMatrix operator /(const float s) const { - float newm[kNumColorChannels*kNumColorChannels]; - for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] / s; - return RGBAMatrix(newm); - } + RGBAMatrix operator /(const float s) const { + float newm[kNumColorChannels*kNumColorChannels]; + for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] / s; + return RGBAMatrix(newm); + } - RGBAMatrix &operator /=(const float s) { - for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] /= s; - return *this; - } + RGBAMatrix &operator /=(const float s) { + for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] /= s; + return *this; + } - RGBAMatrix operator *(const float s) const { - float newm[kNumColorChannels*kNumColorChannels]; - for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] * s; - return RGBAMatrix(newm); - } + RGBAMatrix operator *(const float s) const { + float newm[kNumColorChannels*kNumColorChannels]; + for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] * s; + return RGBAMatrix(newm); + } - RGBAMatrix operator *(const double s) const { - float newm[kNumColorChannels*kNumColorChannels]; - for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = float(double(m[i]) * s); - return RGBAMatrix(newm); - } + RGBAMatrix operator *(const double s) const { + float newm[kNumColorChannels*kNumColorChannels]; + for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = float(double(m[i]) * s); + return RGBAMatrix(newm); + } - friend RGBAMatrix operator *(const float s, const RGBAMatrix &p) { - float newm[kNumColorChannels*kNumColorChannels]; - for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = p.m[i] * s; - return RGBAMatrix(newm); - } + friend RGBAMatrix operator *(const float s, const RGBAMatrix &p) { + float newm[kNumColorChannels*kNumColorChannels]; + for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = p.m[i] * s; + return RGBAMatrix(newm); + } - friend RGBAMatrix operator *(const double s, const RGBAMatrix &p) { - float newm[kNumColorChannels*kNumColorChannels]; - for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = float(double(p.m[i]) * s); - return RGBAMatrix(newm); - } + friend RGBAMatrix operator *(const double s, const RGBAMatrix &p) { + float newm[kNumColorChannels*kNumColorChannels]; + for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = float(double(p.m[i]) * s); + return RGBAMatrix(newm); + } - RGBAMatrix &operator *=(const float s) { - for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] *= s; - return *this; - } + RGBAMatrix &operator *=(const float s) { + for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] *= s; + return *this; + } - float &operator ()(const int i, const int j) { - return (*this)[i*4 + j]; - } + float &operator ()(const int i, const int j) { + return (*this)[i*4 + j]; + } - float &operator [](const int i) { - return m[i]; - } + float &operator [](const int i) { + return m[i]; + } - friend bool operator ==(const RGBAMatrix &rhs, const RGBAMatrix &lhs) { - const RGBAMatrix d = rhs - lhs; - for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) - if(d.m[i] > 1e-10) - return false; - return true; - } + friend bool operator ==(const RGBAMatrix &rhs, const RGBAMatrix &lhs) { + const RGBAMatrix d = rhs - lhs; + for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) + if(d.m[i] > 1e-10) + return false; + return true; + } - operator float *() { - return m; - } + operator float *() { + return m; + } - RGBAVector operator *(const RGBAVector &p) const; - RGBAMatrix operator *(const RGBAMatrix &mat) const; - RGBAMatrix &operator *=(const RGBAMatrix &mat); - static RGBAMatrix RotateX(float rad); - static RGBAMatrix RotateY(float rad); - static RGBAMatrix RotateZ(float rad); - static RGBAMatrix Translate(const RGBAVector &t); - bool Identity(); + RGBAVector operator *(const RGBAVector &p) const; + RGBAMatrix operator *(const RGBAMatrix &mat) const; + RGBAMatrix &operator *=(const RGBAMatrix &mat); + static RGBAMatrix RotateX(float rad); + static RGBAMatrix RotateY(float rad); + static RGBAMatrix RotateZ(float rad); + static RGBAMatrix Translate(const RGBAVector &t); + bool Identity(); }; class RGBADir : public RGBAVector { public: - RGBADir() : RGBAVector() { } - RGBADir(const RGBAVector &p) : RGBAVector(p) { - *this /= Length(); - } + RGBADir() : RGBAVector() { } + RGBADir(const RGBAVector &p) : RGBAVector(p) { + *this /= Length(); + } }; // Makes sure that the values of the endpoints lie between 0 and 1. @@ -340,83 +340,83 @@ extern void ClampEndpoints(RGBAVector &p1, RGBAVector &p2); class RGBACluster { public: - RGBACluster() : - m_NumPoints(0), m_Total(0), - m_PointBitString(0), - m_Min(FLT_MAX), - m_Max(-FLT_MAX), - m_PrincipalAxisCached(false) - { } + RGBACluster() : + m_NumPoints(0), m_Total(0), + m_PointBitString(0), + m_Min(FLT_MAX), + m_Max(-FLT_MAX), + m_PrincipalAxisCached(false) + { } - RGBACluster(const RGBACluster &c) : - m_NumPoints(c.m_NumPoints), - m_Total(c.m_Total), - m_PointBitString(c.m_PointBitString), - m_Min(c.m_Min), - m_Max(c.m_Max), - m_PrincipalAxisCached(c.m_PrincipalAxisCached), - m_PrincipalEigenvalue(c.m_PrincipalEigenvalue), - m_SecondEigenvalue(c.m_SecondEigenvalue), - m_PowerMethodIterations(c.m_PowerMethodIterations), - m_PrincipalAxis(c.m_PrincipalAxis) - { - memcpy(this->m_DataPoints, c.m_DataPoints, m_NumPoints * sizeof(RGBAVector)); - } + RGBACluster(const RGBACluster &c) : + m_NumPoints(c.m_NumPoints), + m_Total(c.m_Total), + m_PointBitString(c.m_PointBitString), + m_Min(c.m_Min), + m_Max(c.m_Max), + m_PrincipalAxisCached(c.m_PrincipalAxisCached), + m_PrincipalEigenvalue(c.m_PrincipalEigenvalue), + m_SecondEigenvalue(c.m_SecondEigenvalue), + m_PowerMethodIterations(c.m_PowerMethodIterations), + m_PrincipalAxis(c.m_PrincipalAxis) + { + memcpy(this->m_DataPoints, c.m_DataPoints, m_NumPoints * sizeof(RGBAVector)); + } - RGBACluster(const RGBACluster &left, const RGBACluster &right); - RGBACluster(const RGBAVector &p) : - m_NumPoints(1), - m_Total(p), - m_PointBitString(0), - m_Min(p), m_Max(p), - m_PrincipalAxisCached(false) - { - m_DataPoints[0] = p; - m_PointBitString |= (1 << p.GetIdx()); - } - - RGBAVector GetTotal() const { return m_Total; } - const RGBAVector &GetPoint(int idx) const { return m_DataPoints[idx]; } - uint32 GetNumPoints() const { return m_NumPoints; } - RGBAVector GetAvg() const { return m_Total / float(m_NumPoints); } - const RGBAVector *GetPoints() const { return m_DataPoints; } + RGBACluster(const RGBACluster &left, const RGBACluster &right); + RGBACluster(const RGBAVector &p) : + m_NumPoints(1), + m_Total(p), + m_PointBitString(0), + m_Min(p), m_Max(p), + m_PrincipalAxisCached(false) + { + m_DataPoints[0] = p; + m_PointBitString |= (1 << p.GetIdx()); + } + + RGBAVector GetTotal() const { return m_Total; } + const RGBAVector &GetPoint(int idx) const { return m_DataPoints[idx]; } + uint32 GetNumPoints() const { return m_NumPoints; } + RGBAVector GetAvg() const { return m_Total / float(m_NumPoints); } + const RGBAVector *GetPoints() const { return m_DataPoints; } - void AddPoint(const RGBAVector &p); + void AddPoint(const RGBAVector &p); - void GetBoundingBox(RGBAVector &Min, RGBAVector &Max) const { - Min = m_Min, Max = m_Max; - } + void GetBoundingBox(RGBAVector &Min, RGBAVector &Max) const { + Min = m_Min, Max = m_Max; + } - // Returns the error if we were to quantize the colors right now with the given number of buckets and bit mask. - double QuantizedError(const RGBAVector &p1, const RGBAVector &p2, uint8 nBuckets, uint32 bitMask, const RGBAVector &errorMetricVec, const int pbits[2] = NULL, int *indices = NULL) const; + // Returns the error if we were to quantize the colors right now with the given number of buckets and bit mask. + double QuantizedError(const RGBAVector &p1, const RGBAVector &p2, uint8 nBuckets, uint32 bitMask, const RGBAVector &errorMetricVec, const int pbits[2] = NULL, int *indices = NULL) const; - // Returns the principal axis for this point cluster. - double GetPrincipalEigenvalue(); - double GetSecondEigenvalue(); - uint32 GetPowerMethodIterations(); - void GetPrincipalAxis(RGBADir &axis); + // Returns the principal axis for this point cluster. + double GetPrincipalEigenvalue(); + double GetSecondEigenvalue(); + uint32 GetPowerMethodIterations(); + void GetPrincipalAxis(RGBADir &axis); - bool AllSamePoint() const { return m_Max == m_Min; } - int GetPointBitString() const { return m_PointBitString; } + bool AllSamePoint() const { return m_Max == m_Min; } + int GetPointBitString() const { return m_PointBitString; } private: - // The number of points in the cluster. - uint32 m_NumPoints; + // The number of points in the cluster. + uint32 m_NumPoints; - RGBAVector m_Total; + RGBAVector m_Total; - // The points in the cluster. - RGBAVector m_DataPoints[kMaxNumDataPoints]; + // The points in the cluster. + RGBAVector m_DataPoints[kMaxNumDataPoints]; - int m_PointBitString; - RGBAVector m_Min, m_Max; + int m_PointBitString; + RGBAVector m_Min, m_Max; - bool m_PrincipalAxisCached; - double m_PrincipalEigenvalue; - double m_SecondEigenvalue; - uint32 m_PowerMethodIterations; - RGBADir m_PrincipalAxis; + bool m_PrincipalAxisCached; + double m_PrincipalEigenvalue; + double m_SecondEigenvalue; + uint32 m_PowerMethodIterations; + RGBADir m_PrincipalAxis; }; extern uint8 QuantizeChannel(const uint8 val, const uint8 mask, const int pBit = -1); diff --git a/BPTCEncoder/src/RGBAEndpointsSIMD.cpp b/BPTCEncoder/src/RGBAEndpointsSIMD.cpp index c187326..bf71188 100755 --- a/BPTCEncoder/src/RGBAEndpointsSIMD.cpp +++ b/BPTCEncoder/src/RGBAEndpointsSIMD.cpp @@ -92,37 +92,37 @@ static inline uint32 popcnt32(uint32 x) { /* Original scalar implementation: - // If the mask is all the bits, then we can just return the value. - if(mask == 0xFF) { - return val; - } + // If the mask is all the bits, then we can just return the value. + if(mask == 0xFF) { + return val; + } - uint32 prec = CountBitsInMask(mask); - const uint32 step = 1 << (8 - prec); + uint32 prec = CountBitsInMask(mask); + const uint32 step = 1 << (8 - prec); - assert(step-1 == uint8(~mask)); + assert(step-1 == uint8(~mask)); - uint32 lval = val & mask; - uint32 hval = lval + step; + uint32 lval = val & mask; + uint32 hval = lval + step; - if(pBit >= 0) { - prec++; - lval |= !!(pBit) << (8 - prec); - hval |= !!(pBit) << (8 - prec); - } + if(pBit >= 0) { + prec++; + lval |= !!(pBit) << (8 - prec); + hval |= !!(pBit) << (8 - prec); + } - if(lval > val) { - lval -= step; - hval -= step; - } + if(lval > val) { + lval -= step; + hval -= step; + } - lval |= lval >> prec; - hval |= hval >> prec; + lval |= lval >> prec; + hval |= hval >> prec; - if(sad(val, lval) < sad(val, hval)) - return lval; - else - return hval; + if(sad(val, lval) < sad(val, hval)) + return lval; + else + return hval; */ // !TODO! AVX2 supports an instruction known as vsllv, which shifts a vector @@ -158,114 +158,114 @@ static const ALIGN_SSE uint32 kThirtyTwoVector[4] = { 32, 32, 32, 32 }; static const __m128i kByteValMask = _mm_set_epi32(0xFF, 0xFF, 0xFF, 0xFF); static inline __m128i sad(const __m128i &a, const __m128i &b) { - const __m128i maxab = _mm_max_epu8(a, b); - const __m128i minab = _mm_min_epu8(a, b); - return _mm_and_si128( kByteValMask, _mm_subs_epu8( maxab, minab ) ); + const __m128i maxab = _mm_max_epu8(a, b); + const __m128i minab = _mm_min_epu8(a, b); + return _mm_and_si128( kByteValMask, _mm_subs_epu8( maxab, minab ) ); } __m128i RGBAVectorSIMD::ToPixel(const __m128i &qmask) const { - // !SPEED! We should figure out a way to get rid of these scalar operations. + // !SPEED! We should figure out a way to get rid of these scalar operations. #ifdef HAS_SSE_POPCNT - const uint32 prec = _mm_popcnt_u32(((uint32 *)(&qmask))[0]); + const uint32 prec = _mm_popcnt_u32(((uint32 *)(&qmask))[0]); #else - const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]); + const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]); #endif - - assert(r >= 0.0f && r <= 255.0f); - assert(g >= 0.0f && g <= 255.0f); - assert(b >= 0.0f && b <= 255.0f); - assert(a >= 0.0f && a <= 255.0f); - assert(((uint32 *)(&qmask))[3] == 0xFF || ((uint32 *)(&qmask))[3] == ((uint32 *)(&qmask))[0]); - assert(((uint32 *)(&qmask))[2] == ((uint32 *)(&qmask))[1] && ((uint32 *)(&qmask))[0] == ((uint32 *)(&qmask))[1]); + + assert(r >= 0.0f && r <= 255.0f); + assert(g >= 0.0f && g <= 255.0f); + assert(b >= 0.0f && b <= 255.0f); + assert(a >= 0.0f && a <= 255.0f); + assert(((uint32 *)(&qmask))[3] == 0xFF || ((uint32 *)(&qmask))[3] == ((uint32 *)(&qmask))[0]); + assert(((uint32 *)(&qmask))[2] == ((uint32 *)(&qmask))[1] && ((uint32 *)(&qmask))[0] == ((uint32 *)(&qmask))[1]); - const __m128i val = _mm_cvtps_epi32( _mm_add_ps(kHalfVector, vec) ); + const __m128i val = _mm_cvtps_epi32( _mm_add_ps(kHalfVector, vec) ); - const __m128i step = _mm_slli_epi32( kOneVector, 8 - prec ); - const __m128i &mask = qmask; + const __m128i step = _mm_slli_epi32( kOneVector, 8 - prec ); + const __m128i &mask = qmask; - __m128i lval = _mm_and_si128(val, mask); - __m128i hval = _mm_add_epi32(lval, step); + __m128i lval = _mm_and_si128(val, mask); + __m128i hval = _mm_add_epi32(lval, step); - const __m128i lvalShift = _mm_srli_epi32(lval, prec); - const __m128i hvalShift = _mm_srli_epi32(hval, prec); + const __m128i lvalShift = _mm_srli_epi32(lval, prec); + const __m128i hvalShift = _mm_srli_epi32(hval, prec); - lval = _mm_or_si128(lval, lvalShift); - hval = _mm_or_si128(hval, hvalShift); + lval = _mm_or_si128(lval, lvalShift); + hval = _mm_or_si128(hval, hvalShift); - const __m128i lvald = _mm_sub_epi32( val, lval ); - const __m128i hvald = _mm_sub_epi32( hval, val ); + const __m128i lvald = _mm_sub_epi32( val, lval ); + const __m128i hvald = _mm_sub_epi32( hval, val ); - const __m128i vd = _mm_cmplt_epi32(lvald, hvald); - __m128i ans = _mm_blendv_epi8(hval, lval, vd); + const __m128i vd = _mm_cmplt_epi32(lvald, hvald); + __m128i ans = _mm_blendv_epi8(hval, lval, vd); - const __m128i chanExact = _mm_cmpeq_epi32(mask, kByteValMask); - ans = _mm_blendv_epi8( ans, val, chanExact ); - return ans; + const __m128i chanExact = _mm_cmpeq_epi32(mask, kByteValMask); + ans = _mm_blendv_epi8( ans, val, chanExact ); + return ans; } __m128i RGBAVectorSIMD::ToPixel(const __m128i &qmask, const int pBit) const { - - // !SPEED! We should figure out a way to get rid of these scalar operations. + + // !SPEED! We should figure out a way to get rid of these scalar operations. #ifdef HAS_SSE_POPCNT - const uint32 prec = _mm_popcnt_u32(((uint32 *)(&qmask))[0]); + const uint32 prec = _mm_popcnt_u32(((uint32 *)(&qmask))[0]); #else - const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]); + const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]); #endif - - assert(r >= 0.0f && r <= 255.0f); - assert(g >= 0.0f && g <= 255.0f); - assert(b >= 0.0f && b <= 255.0f); - assert(a >= 0.0f && a <= 255.0f); - assert(((uint32 *)(&qmask))[3] == 0xFF || ((uint32 *)(&qmask))[3] == ((uint32 *)(&qmask))[0]); - assert(((uint32 *)(&qmask))[2] == ((uint32 *)(&qmask))[1] && ((uint32 *)(&qmask))[0] == ((uint32 *)(&qmask))[1]); + + assert(r >= 0.0f && r <= 255.0f); + assert(g >= 0.0f && g <= 255.0f); + assert(b >= 0.0f && b <= 255.0f); + assert(a >= 0.0f && a <= 255.0f); + assert(((uint32 *)(&qmask))[3] == 0xFF || ((uint32 *)(&qmask))[3] == ((uint32 *)(&qmask))[0]); + assert(((uint32 *)(&qmask))[2] == ((uint32 *)(&qmask))[1] && ((uint32 *)(&qmask))[0] == ((uint32 *)(&qmask))[1]); - const __m128i val = _mm_cvtps_epi32( _mm_add_ps(kHalfVector, vec) ); - const __m128i pbit = _mm_set1_epi32(!!pBit); + const __m128i val = _mm_cvtps_epi32( _mm_add_ps(kHalfVector, vec) ); + const __m128i pbit = _mm_set1_epi32(!!pBit); - const __m128i &mask = qmask; // _mm_set_epi32(alphaMask, channelMask, channelMask, channelMask); - const __m128i step = _mm_slli_epi32( kOneVector, 8 - prec ); + const __m128i &mask = qmask; // _mm_set_epi32(alphaMask, channelMask, channelMask, channelMask); + const __m128i step = _mm_slli_epi32( kOneVector, 8 - prec ); - __m128i lval = _mm_and_si128( val, mask ); - __m128i hval = _mm_add_epi32( lval, step ); + __m128i lval = _mm_and_si128( val, mask ); + __m128i hval = _mm_add_epi32( lval, step ); - const __m128i pBitShifted = _mm_slli_epi32(pbit, 7 - prec); - lval = _mm_or_si128(lval, pBitShifted ); - hval = _mm_or_si128(hval, pBitShifted); + const __m128i pBitShifted = _mm_slli_epi32(pbit, 7 - prec); + lval = _mm_or_si128(lval, pBitShifted ); + hval = _mm_or_si128(hval, pBitShifted); - // These next three lines we make sure that after adding the pbit that val is - // still in between lval and hval. If it isn't, then we subtract a - // step from both. Now, val should be larger than lval and less than - // hval, but certain situations make this not always the case (e.g. val - // is 0, precision is 4 bits, and pbit is 1). Hence, we add back the - // step if it goes below zero, making it equivalent to hval and so it - // doesn't matter which we choose. - { - __m128i cmp = _mm_cmpgt_epi32(lval, val); - cmp = _mm_mullo_epi32(cmp, step); - lval = _mm_add_epi32(lval, cmp); - hval = _mm_add_epi32(hval, cmp); + // These next three lines we make sure that after adding the pbit that val is + // still in between lval and hval. If it isn't, then we subtract a + // step from both. Now, val should be larger than lval and less than + // hval, but certain situations make this not always the case (e.g. val + // is 0, precision is 4 bits, and pbit is 1). Hence, we add back the + // step if it goes below zero, making it equivalent to hval and so it + // doesn't matter which we choose. + { + __m128i cmp = _mm_cmpgt_epi32(lval, val); + cmp = _mm_mullo_epi32(cmp, step); + lval = _mm_add_epi32(lval, cmp); + hval = _mm_add_epi32(hval, cmp); - cmp = _mm_cmplt_epi32(lval, kZeroVector); - cmp = _mm_mullo_epi32(cmp, step); - lval = _mm_sub_epi32(lval, cmp); - } + cmp = _mm_cmplt_epi32(lval, kZeroVector); + cmp = _mm_mullo_epi32(cmp, step); + lval = _mm_sub_epi32(lval, cmp); + } - const __m128i lvalShift = _mm_srli_epi32(lval, prec + 1); - const __m128i hvalShift = _mm_srli_epi32(hval, prec + 1); + const __m128i lvalShift = _mm_srli_epi32(lval, prec + 1); + const __m128i hvalShift = _mm_srli_epi32(hval, prec + 1); - lval = _mm_or_si128(lval, lvalShift); - hval = _mm_or_si128(hval, hvalShift); + lval = _mm_or_si128(lval, lvalShift); + hval = _mm_or_si128(hval, hvalShift); - const __m128i lvald = _mm_sub_epi32( val, lval ); - const __m128i hvald = _mm_sub_epi32( hval, val ); + const __m128i lvald = _mm_sub_epi32( val, lval ); + const __m128i hvald = _mm_sub_epi32( hval, val ); - const __m128i vd = _mm_cmplt_epi32(lvald, hvald); - __m128i ans = _mm_blendv_epi8(hval, lval, vd); + const __m128i vd = _mm_cmplt_epi32(lvald, hvald); + __m128i ans = _mm_blendv_epi8(hval, lval, vd); - const __m128i chanExact = _mm_cmpeq_epi32(mask, kByteValMask); - ans = _mm_blendv_epi8( ans, val, chanExact ); - return ans; + const __m128i chanExact = _mm_cmpeq_epi32(mask, kByteValMask); + ans = _mm_blendv_epi8( ans, val, chanExact ); + return ans; } /////////////////////////////////////////////////////////////////////////////// @@ -275,18 +275,18 @@ __m128i RGBAVectorSIMD::ToPixel(const __m128i &qmask, const int pBit) const { /////////////////////////////////////////////////////////////////////////////// RGBAVectorSIMD RGBAMatrixSIMD::operator *(const RGBAVectorSIMD &p) const { - - __m128 xVec = _mm_set1_ps( p.x ); - __m128 yVec = _mm_set1_ps( p.y ); - __m128 zVec = _mm_set1_ps( p.z ); - __m128 wVec = _mm_set1_ps( p.w ); + + __m128 xVec = _mm_set1_ps( p.x ); + __m128 yVec = _mm_set1_ps( p.y ); + __m128 zVec = _mm_set1_ps( p.z ); + __m128 wVec = _mm_set1_ps( p.w ); - __m128 vec1 = _mm_mul_ps( xVec, col[0] ); - __m128 vec2 = _mm_mul_ps( yVec, col[1] ); - __m128 vec3 = _mm_mul_ps( zVec, col[2] ); - __m128 vec4 = _mm_mul_ps( wVec, col[3] ); + __m128 vec1 = _mm_mul_ps( xVec, col[0] ); + __m128 vec2 = _mm_mul_ps( yVec, col[1] ); + __m128 vec3 = _mm_mul_ps( zVec, col[2] ); + __m128 vec4 = _mm_mul_ps( wVec, col[3] ); - return RGBAVectorSIMD( _mm_add_ps( _mm_add_ps( vec1, vec2 ), _mm_add_ps( vec3, vec4 ) ) ); + return RGBAVectorSIMD( _mm_add_ps( _mm_add_ps( vec1, vec2 ), _mm_add_ps( vec3, vec4 ) ) ); } /////////////////////////////////////////////////////////////////////////////// @@ -297,104 +297,104 @@ RGBAVectorSIMD RGBAMatrixSIMD::operator *(const RGBAVectorSIMD &p) const { RGBAClusterSIMD::RGBAClusterSIMD(const RGBAClusterSIMD &left, const RGBAClusterSIMD &right) { - assert(!(left.m_PointBitString & right.m_PointBitString)); + assert(!(left.m_PointBitString & right.m_PointBitString)); - *this = left; - for(int i = 0; i < right.m_NumPoints; i++) { + *this = left; + for(int i = 0; i < right.m_NumPoints; i++) { - const RGBAVectorSIMD &p = right.m_DataPoints[i]; + const RGBAVectorSIMD &p = right.m_DataPoints[i]; - assert(m_NumPoints < kMaxNumDataPoints); - m_Total += p; - m_DataPoints[m_NumPoints++] = p; + assert(m_NumPoints < kMaxNumDataPoints); + m_Total += p; + m_DataPoints[m_NumPoints++] = p; - m_Min.vec = _mm_min_ps(m_Min.vec, p.vec); - m_Max.vec = _mm_max_ps(m_Max.vec, p.vec); - } + m_Min.vec = _mm_min_ps(m_Min.vec, p.vec); + m_Max.vec = _mm_max_ps(m_Max.vec, p.vec); + } - m_PointBitString = left.m_PointBitString | right.m_PointBitString; - m_PrincipalAxisCached = false; -} + m_PointBitString = left.m_PointBitString | right.m_PointBitString; + m_PrincipalAxisCached = false; +} void RGBAClusterSIMD::AddPoint(const RGBAVectorSIMD &p, int idx) { - assert(m_NumPoints < kMaxNumDataPoints); - m_Total += p; - m_DataPoints[m_NumPoints++] = p; - m_PointBitString |= 1 << idx; + assert(m_NumPoints < kMaxNumDataPoints); + m_Total += p; + m_DataPoints[m_NumPoints++] = p; + m_PointBitString |= 1 << idx; - m_Min.vec = _mm_min_ps(m_Min.vec, p.vec); - m_Max.vec = _mm_max_ps(m_Max.vec, p.vec); + m_Min.vec = _mm_min_ps(m_Min.vec, p.vec); + m_Max.vec = _mm_max_ps(m_Max.vec, p.vec); } float RGBAClusterSIMD::QuantizedError(const RGBAVectorSIMD &p1, const RGBAVectorSIMD &p2, const uint8 nBuckets, const __m128i &bitMask, const int pbits[2], __m128i *indices) const { - // nBuckets should be a power of two. - assert(!(nBuckets & (nBuckets - 1))); + // nBuckets should be a power of two. + assert(!(nBuckets & (nBuckets - 1))); #ifdef HAS_SSE_POPCNT - const uint8 indexPrec = 8-_mm_popcnt_u32(~(nBuckets - 1) & 0xFF); + const uint8 indexPrec = 8-_mm_popcnt_u32(~(nBuckets - 1) & 0xFF); #else - const uint8 indexPrec = 8-popcnt32(~(nBuckets - 1) & 0xFF); + const uint8 indexPrec = 8-popcnt32(~(nBuckets - 1) & 0xFF); #endif - assert(indexPrec >= 2 && indexPrec <= 4); + assert(indexPrec >= 2 && indexPrec <= 4); - typedef __m128i tInterpPair[2]; - typedef tInterpPair tInterpLevel[16]; - const tInterpLevel *interpVals = kBC7InterpolationValuesSIMD + (indexPrec - 1); + typedef __m128i tInterpPair[2]; + typedef tInterpPair tInterpLevel[16]; + const tInterpLevel *interpVals = kBC7InterpolationValuesSIMD + (indexPrec - 1); - __m128i qp1, qp2; - if(pbits) { - qp1 = p1.ToPixel(bitMask, pbits[0]); - qp2 = p2.ToPixel(bitMask, pbits[1]); - } - else { - qp1 = p1.ToPixel(bitMask); - qp2 = p2.ToPixel(bitMask); - } + __m128i qp1, qp2; + if(pbits) { + qp1 = p1.ToPixel(bitMask, pbits[0]); + qp2 = p2.ToPixel(bitMask, pbits[1]); + } + else { + qp1 = p1.ToPixel(bitMask); + qp2 = p2.ToPixel(bitMask); + } - __m128 errorMetricVec = _mm_load_ps( BC7C::GetErrorMetric() ); + __m128 errorMetricVec = _mm_load_ps( BC7C::GetErrorMetric() ); - __m128 totalError = kZero; - for(int i = 0; i < m_NumPoints; i++) { + __m128 totalError = kZero; + for(int i = 0; i < m_NumPoints; i++) { - const __m128i pixel = m_DataPoints[i].ToPixel( kByteValMask ); + const __m128i pixel = m_DataPoints[i].ToPixel( kByteValMask ); - __m128 minError = _mm_set1_ps(FLT_MAX); - __m128i bestBucket = _mm_set1_epi32(-1); - for(int j = 0; j < nBuckets; j++) { + __m128 minError = _mm_set1_ps(FLT_MAX); + __m128i bestBucket = _mm_set1_epi32(-1); + for(int j = 0; j < nBuckets; j++) { - const __m128i jVec = _mm_set1_epi32(j); - const __m128i interp0 = (*interpVals)[j][0]; - const __m128i interp1 = (*interpVals)[j][1]; + const __m128i jVec = _mm_set1_epi32(j); + const __m128i interp0 = (*interpVals)[j][0]; + const __m128i interp1 = (*interpVals)[j][1]; - const __m128i ip0 = _mm_mullo_epi32( qp1, interp0 ); - const __m128i ip1 = _mm_mullo_epi32( qp2, interp1 ); - const __m128i ip = _mm_add_epi32( *((const __m128i *)kThirtyTwoVector), _mm_add_epi32( ip0, ip1 ) ); - const __m128i dist = sad( _mm_and_si128( _mm_srli_epi32( ip, 6 ), kByteValMask ), pixel ); - __m128 errorVec = _mm_cvtepi32_ps( dist ); - - errorVec = _mm_mul_ps( errorVec, errorMetricVec ); - errorVec = _mm_mul_ps( errorVec, errorVec ); - errorVec = _mm_hadd_ps( errorVec, errorVec ); - errorVec = _mm_hadd_ps( errorVec, errorVec ); + const __m128i ip0 = _mm_mullo_epi32( qp1, interp0 ); + const __m128i ip1 = _mm_mullo_epi32( qp2, interp1 ); + const __m128i ip = _mm_add_epi32( *((const __m128i *)kThirtyTwoVector), _mm_add_epi32( ip0, ip1 ) ); + const __m128i dist = sad( _mm_and_si128( _mm_srli_epi32( ip, 6 ), kByteValMask ), pixel ); + __m128 errorVec = _mm_cvtepi32_ps( dist ); + + errorVec = _mm_mul_ps( errorVec, errorMetricVec ); + errorVec = _mm_mul_ps( errorVec, errorVec ); + errorVec = _mm_hadd_ps( errorVec, errorVec ); + errorVec = _mm_hadd_ps( errorVec, errorVec ); - const __m128 cmp = _mm_cmple_ps( errorVec, minError ); - minError = _mm_blendv_ps( minError, errorVec, cmp ); - bestBucket = _mm_blendv_epi8( bestBucket, jVec, _mm_castps_si128( cmp ) ); + const __m128 cmp = _mm_cmple_ps( errorVec, minError ); + minError = _mm_blendv_ps( minError, errorVec, cmp ); + bestBucket = _mm_blendv_epi8( bestBucket, jVec, _mm_castps_si128( cmp ) ); - // Conceptually, once the error starts growing, it doesn't stop growing (we're moving - // farther away from the reference point along the line). Hence we can early out here. - // However, quanitzation artifacts mean that this is not ALWAYS the case, so we do suffer - // about 0.01 RMS error. - if(!((uint8 *)(&cmp))[0]) - break; - } + // Conceptually, once the error starts growing, it doesn't stop growing (we're moving + // farther away from the reference point along the line). Hence we can early out here. + // However, quanitzation artifacts mean that this is not ALWAYS the case, so we do suffer + // about 0.01 RMS error. + if(!((uint8 *)(&cmp))[0]) + break; + } - totalError = _mm_add_ps(totalError, minError); - if(indices) ((uint32 *)indices)[i] = ((uint32 *)(&bestBucket))[0]; - } + totalError = _mm_add_ps(totalError, minError); + if(indices) ((uint32 *)indices)[i] = ((uint32 *)(&bestBucket))[0]; + } - return ((float *)(&totalError))[0]; + return ((float *)(&totalError))[0]; } /////////////////////////////////////////////////////////////////////////////// @@ -404,69 +404,69 @@ float RGBAClusterSIMD::QuantizedError(const RGBAVectorSIMD &p1, const RGBAVector /////////////////////////////////////////////////////////////////////////////// void ClampEndpoints(RGBAVectorSIMD &p1, RGBAVectorSIMD &p2) { - p1.vec = _mm_min_ps( kByteMax, _mm_max_ps( p1.vec, kZero ) ); - p2.vec = _mm_min_ps( kByteMax, _mm_max_ps( p2.vec, kZero ) ); + p1.vec = _mm_min_ps( kByteMax, _mm_max_ps( p1.vec, kZero ) ); + p2.vec = _mm_min_ps( kByteMax, _mm_max_ps( p2.vec, kZero ) ); } void GetPrincipalAxis(const RGBAClusterSIMD &c, RGBADirSIMD &axis) { - if(c.GetNumPoints() == 2) { - axis = c.GetPoint(1) - c.GetPoint(0); - return; - } + if(c.GetNumPoints() == 2) { + axis = c.GetPoint(1) - c.GetPoint(0); + return; + } - RGBAVectorSIMD avg = c.GetTotal(); - avg /= float(c.GetNumPoints()); + RGBAVectorSIMD avg = c.GetTotal(); + avg /= float(c.GetNumPoints()); - // We use these vectors for calculating the covariance matrix... - RGBAVectorSIMD toPts[kMaxNumDataPoints]; - RGBAVectorSIMD toPtsMax(-FLT_MAX); - for(int i = 0; i < c.GetNumPoints(); i++) { - toPts[i] = c.GetPoint(i) - avg; - toPtsMax.vec = _mm_max_ps(toPtsMax.vec, toPts[i].vec); - } + // We use these vectors for calculating the covariance matrix... + RGBAVectorSIMD toPts[kMaxNumDataPoints]; + RGBAVectorSIMD toPtsMax(-FLT_MAX); + for(int i = 0; i < c.GetNumPoints(); i++) { + toPts[i] = c.GetPoint(i) - avg; + toPtsMax.vec = _mm_max_ps(toPtsMax.vec, toPts[i].vec); + } - RGBAMatrixSIMD covMatrix; + RGBAMatrixSIMD covMatrix; - // Compute covariance. - const float fNumPoints = float(c.GetNumPoints()); - for(int i = 0; i < kNumColorChannels; i++) { - for(int j = 0; j <= i; j++) { + // Compute covariance. + const float fNumPoints = float(c.GetNumPoints()); + for(int i = 0; i < kNumColorChannels; i++) { + for(int j = 0; j <= i; j++) { - float sum = 0.0; - for(int k = 0; k < c.GetNumPoints(); k++) { - sum += toPts[k].c[i] * toPts[k].c[j]; - } + float sum = 0.0; + for(int k = 0; k < c.GetNumPoints(); k++) { + sum += toPts[k].c[i] * toPts[k].c[j]; + } - covMatrix(i, j) = sum / fNumPoints; - covMatrix(j, i) = covMatrix(i, j); - } - } + covMatrix(i, j) = sum / fNumPoints; + covMatrix(j, i) = covMatrix(i, j); + } + } - // !SPEED! Find eigenvectors by using the power method. This is good because the - // matrix is only 4x4, which allows us to use SIMD... - RGBAVectorSIMD b = toPtsMax; - assert(b.Length() > 0); - b /= b.Length(); + // !SPEED! Find eigenvectors by using the power method. This is good because the + // matrix is only 4x4, which allows us to use SIMD... + RGBAVectorSIMD b = toPtsMax; + assert(b.Length() > 0); + b /= b.Length(); - RGBAVectorSIMD newB = covMatrix * b; + RGBAVectorSIMD newB = covMatrix * b; - // !HACK! If the principal eigenvector of the covariance matrix - // converges to zero, that means that the points lie equally - // spaced on a sphere in this space. In this (extremely rare) - // situation, just choose a point and use it as the principal - // direction. - const float newBlen = newB.Length(); - if(newBlen < 1e-10) { - axis = toPts[0]; - return; - } + // !HACK! If the principal eigenvector of the covariance matrix + // converges to zero, that means that the points lie equally + // spaced on a sphere in this space. In this (extremely rare) + // situation, just choose a point and use it as the principal + // direction. + const float newBlen = newB.Length(); + if(newBlen < 1e-10) { + axis = toPts[0]; + return; + } - for(int i = 0; i < 8; i++) { - newB = covMatrix * b; - newB.Normalize(); - b = newB; - } + for(int i = 0; i < 8; i++) { + newB = covMatrix * b; + newB.Normalize(); + b = newB; + } - axis = b; + axis = b; } diff --git a/BPTCEncoder/src/RGBAEndpointsSIMD.h b/BPTCEncoder/src/RGBAEndpointsSIMD.h index a83176f..6288c7c 100755 --- a/BPTCEncoder/src/RGBAEndpointsSIMD.h +++ b/BPTCEncoder/src/RGBAEndpointsSIMD.h @@ -81,270 +81,270 @@ static const __m128 kEpsilonSIMD = _mm_set1_ps(1e-8f); class RGBAVectorSIMD { public: - union { - struct { float r, g, b, a; }; - struct { float x, y, z, w; }; - float c[4]; - __m128 vec; - }; + union { + struct { float r, g, b, a; }; + struct { float x, y, z, w; }; + float c[4]; + __m128 vec; + }; - RGBAVectorSIMD() : r(-1.0), g(-1.0), b(-1.0), a(-1.0) { } - RGBAVectorSIMD(uint32 pixel) : - r(float(pixel & 0xFF)), - g(float((pixel >> 8) & 0xFF)), - b(float((pixel >> 16) & 0xFF)), - a(float((pixel >> 24) & 0xFF)) - { } + RGBAVectorSIMD() : r(-1.0), g(-1.0), b(-1.0), a(-1.0) { } + RGBAVectorSIMD(uint32 pixel) : + r(float(pixel & 0xFF)), + g(float((pixel >> 8) & 0xFF)), + b(float((pixel >> 16) & 0xFF)), + a(float((pixel >> 24) & 0xFF)) + { } - explicit RGBAVectorSIMD(float _r, float _g, float _b, float _a) : - r(_r), g(_g), b(_b), a(_a) { } + explicit RGBAVectorSIMD(float _r, float _g, float _b, float _a) : + r(_r), g(_g), b(_b), a(_a) { } - explicit RGBAVectorSIMD(float cc) : r(cc), g(cc), b(cc), a(cc) { } + explicit RGBAVectorSIMD(float cc) : r(cc), g(cc), b(cc), a(cc) { } - RGBAVectorSIMD (const __m128 &newVec) : vec(newVec) { } - RGBAVectorSIMD (const RGBAVectorSIMD &other) : vec(other.vec) { } + RGBAVectorSIMD (const __m128 &newVec) : vec(newVec) { } + RGBAVectorSIMD (const RGBAVectorSIMD &other) : vec(other.vec) { } - RGBAVectorSIMD operator +(const RGBAVectorSIMD &p) const { - return RGBAVectorSIMD( _mm_add_ps(this->vec, p.vec) ); - } + RGBAVectorSIMD operator +(const RGBAVectorSIMD &p) const { + return RGBAVectorSIMD( _mm_add_ps(this->vec, p.vec) ); + } - RGBAVectorSIMD &operator +=(const RGBAVectorSIMD &p) { - this->vec = _mm_add_ps(this->vec, p.vec); - return *this; - } + RGBAVectorSIMD &operator +=(const RGBAVectorSIMD &p) { + this->vec = _mm_add_ps(this->vec, p.vec); + return *this; + } - RGBAVectorSIMD operator -(const RGBAVectorSIMD &p) const { - return RGBAVectorSIMD( _mm_sub_ps(this->vec, p.vec) ); - } + RGBAVectorSIMD operator -(const RGBAVectorSIMD &p) const { + return RGBAVectorSIMD( _mm_sub_ps(this->vec, p.vec) ); + } - RGBAVectorSIMD &operator -=(const RGBAVectorSIMD &p) { - this->vec = _mm_sub_ps(this->vec, p.vec); - return *this; - } + RGBAVectorSIMD &operator -=(const RGBAVectorSIMD &p) { + this->vec = _mm_sub_ps(this->vec, p.vec); + return *this; + } - RGBAVectorSIMD operator /(const float s) const { - return RGBAVectorSIMD( _mm_div_ps(this->vec, _mm_set1_ps(s) ) ); - } + RGBAVectorSIMD operator /(const float s) const { + return RGBAVectorSIMD( _mm_div_ps(this->vec, _mm_set1_ps(s) ) ); + } - RGBAVectorSIMD &operator /=(const float s) { - this->vec = _mm_div_ps(this->vec, _mm_set1_ps(s) ); - return *this; - } + RGBAVectorSIMD &operator /=(const float s) { + this->vec = _mm_div_ps(this->vec, _mm_set1_ps(s) ); + return *this; + } - float operator *(const RGBAVectorSIMD &p) const { - __m128 mul = _mm_mul_ps(this->vec, p.vec); - mul = _mm_hadd_ps(mul, mul); - mul = _mm_hadd_ps(mul, mul); - return ((float *)(&mul))[0]; - } + float operator *(const RGBAVectorSIMD &p) const { + __m128 mul = _mm_mul_ps(this->vec, p.vec); + mul = _mm_hadd_ps(mul, mul); + mul = _mm_hadd_ps(mul, mul); + return ((float *)(&mul))[0]; + } - void Normalize() { - __m128 rsqrt = _mm_rsqrt_ps( _mm_set1_ps( (*this) * (*this) ) ); - vec = _mm_mul_ps( vec, rsqrt ); - } + void Normalize() { + __m128 rsqrt = _mm_rsqrt_ps( _mm_set1_ps( (*this) * (*this) ) ); + vec = _mm_mul_ps( vec, rsqrt ); + } - float Length() const { - return sqrt((*this) * (*this)); - } + float Length() const { + return sqrt((*this) * (*this)); + } - RGBAVectorSIMD &operator *=(const RGBAVectorSIMD &v) { - this->vec = _mm_mul_ps(this->vec, v.vec); - return *this; - } + RGBAVectorSIMD &operator *=(const RGBAVectorSIMD &v) { + this->vec = _mm_mul_ps(this->vec, v.vec); + return *this; + } - RGBAVectorSIMD operator *(const float s) const { - return RGBAVectorSIMD( _mm_mul_ps( this->vec, _mm_set1_ps(s) ) ); - } + RGBAVectorSIMD operator *(const float s) const { + return RGBAVectorSIMD( _mm_mul_ps( this->vec, _mm_set1_ps(s) ) ); + } - friend RGBAVectorSIMD operator *(const float s, const RGBAVectorSIMD &p) { - return RGBAVectorSIMD( _mm_mul_ps( p.vec, _mm_set1_ps(s) ) ); - } + friend RGBAVectorSIMD operator *(const float s, const RGBAVectorSIMD &p) { + return RGBAVectorSIMD( _mm_mul_ps( p.vec, _mm_set1_ps(s) ) ); + } - RGBAVectorSIMD &operator *=(const float s) { - this->vec = _mm_mul_ps( this->vec, _mm_set1_ps(s) ); - return *this; - } + RGBAVectorSIMD &operator *=(const float s) { + this->vec = _mm_mul_ps( this->vec, _mm_set1_ps(s) ); + return *this; + } - float &operator [](const int i) { - return c[i]; - } + float &operator [](const int i) { + return c[i]; + } - friend bool operator ==(const RGBAVectorSIMD &rhs, const RGBAVectorSIMD &lhs) { - __m128 d = _mm_sub_ps(rhs.vec, lhs.vec); - d = _mm_mul_ps(d, d); - __m128 cmp = _mm_cmpgt_ps(d, kEpsilonSIMD); - cmp = _mm_hadd_ps(cmp, cmp); - cmp = _mm_hadd_ps(cmp, cmp); - return ((float *)(&cmp))[0] == 0.0f; - } + friend bool operator ==(const RGBAVectorSIMD &rhs, const RGBAVectorSIMD &lhs) { + __m128 d = _mm_sub_ps(rhs.vec, lhs.vec); + d = _mm_mul_ps(d, d); + __m128 cmp = _mm_cmpgt_ps(d, kEpsilonSIMD); + cmp = _mm_hadd_ps(cmp, cmp); + cmp = _mm_hadd_ps(cmp, cmp); + return ((float *)(&cmp))[0] == 0.0f; + } - friend bool operator !=(const RGBAVectorSIMD &rhs, const RGBAVectorSIMD &lhs) { - return !(rhs == lhs); - } + friend bool operator !=(const RGBAVectorSIMD &rhs, const RGBAVectorSIMD &lhs) { + return !(rhs == lhs); + } - operator float *() { - return c; - } + operator float *() { + return c; + } - // Quantize this point. - __m128i ToPixel(const __m128i &channelMask, const int pBit) const; - __m128i ToPixel(const __m128i &channelMask) const; + // Quantize this point. + __m128i ToPixel(const __m128i &channelMask, const int pBit) const; + __m128i ToPixel(const __m128i &channelMask) const; }; class RGBAMatrixSIMD { private: - union { - float m[kNumColorChannels*kNumColorChannels]; - struct { - float m1, m5, m9, m13; - float m2, m6, m10, m14; - float m3, m7, m11, m15; - float m4, m8, m12, m16; - }; - __m128 col[kNumColorChannels]; - }; + union { + float m[kNumColorChannels*kNumColorChannels]; + struct { + float m1, m5, m9, m13; + float m2, m6, m10, m14; + float m3, m7, m11, m15; + float m4, m8, m12, m16; + }; + __m128 col[kNumColorChannels]; + }; - RGBAMatrixSIMD(const float *arr) { - memcpy(m, arr, sizeof(m)); - } + RGBAMatrixSIMD(const float *arr) { + memcpy(m, arr, sizeof(m)); + } - RGBAMatrixSIMD(const __m128 newcol[kNumColorChannels]) { - for(int i = 0; i < kNumColorChannels; i++) - col[i] = newcol[i]; - } + RGBAMatrixSIMD(const __m128 newcol[kNumColorChannels]) { + for(int i = 0; i < kNumColorChannels; i++) + col[i] = newcol[i]; + } public: - - RGBAMatrixSIMD() : - m1(1.0f), m2(0.0f), m3(0.0f), m4(0.0f), - m5(0.0f), m6(1.0f), m7(0.0f), m8(0.0f), - m9(0.0f), m10(0.0f), m11(1.0f), m12(0.0f), - m13(0.0f), m14(0.0f), m15(0.0f), m16(1.0f) - { } + + RGBAMatrixSIMD() : + m1(1.0f), m2(0.0f), m3(0.0f), m4(0.0f), + m5(0.0f), m6(1.0f), m7(0.0f), m8(0.0f), + m9(0.0f), m10(0.0f), m11(1.0f), m12(0.0f), + m13(0.0f), m14(0.0f), m15(0.0f), m16(1.0f) + { } - RGBAMatrixSIMD &operator =(const RGBAMatrixSIMD &other) { - memcpy(m, other.m, sizeof(m)); - return (*this); - } + RGBAMatrixSIMD &operator =(const RGBAMatrixSIMD &other) { + memcpy(m, other.m, sizeof(m)); + return (*this); + } - RGBAMatrixSIMD operator +(const RGBAMatrixSIMD &p) const { - RGBAMatrixSIMD newm; - for(int i = 0; i < kNumColorChannels; i++) { - newm.col[i] = _mm_add_ps(col[i], p.col[i]); - } - return newm; - } + RGBAMatrixSIMD operator +(const RGBAMatrixSIMD &p) const { + RGBAMatrixSIMD newm; + for(int i = 0; i < kNumColorChannels; i++) { + newm.col[i] = _mm_add_ps(col[i], p.col[i]); + } + return newm; + } - RGBAMatrixSIMD &operator +=(const RGBAMatrixSIMD &p) { - for(int i = 0; i < kNumColorChannels; i++) { - col[i] = _mm_add_ps( col[i], p.col[i] ); - } - return *this; - } + RGBAMatrixSIMD &operator +=(const RGBAMatrixSIMD &p) { + for(int i = 0; i < kNumColorChannels; i++) { + col[i] = _mm_add_ps( col[i], p.col[i] ); + } + return *this; + } - RGBAMatrixSIMD operator -(const RGBAMatrixSIMD &p) const { - RGBAMatrixSIMD newm; - for(int i = 0; i < kNumColorChannels; i++) { - newm.col[i] = _mm_sub_ps( col[i], p.col[i] ); - } - return newm; - } + RGBAMatrixSIMD operator -(const RGBAMatrixSIMD &p) const { + RGBAMatrixSIMD newm; + for(int i = 0; i < kNumColorChannels; i++) { + newm.col[i] = _mm_sub_ps( col[i], p.col[i] ); + } + return newm; + } - RGBAMatrixSIMD &operator -=(const RGBAMatrixSIMD &p) { - for(int i = 0; i < kNumColorChannels; i++) { - col[i] = _mm_sub_ps( col[i], p.col[i] ); - } - return *this; - } + RGBAMatrixSIMD &operator -=(const RGBAMatrixSIMD &p) { + for(int i = 0; i < kNumColorChannels; i++) { + col[i] = _mm_sub_ps( col[i], p.col[i] ); + } + return *this; + } - RGBAMatrixSIMD operator /(const float s) const { - __m128 f = _mm_set1_ps(s); - RGBAMatrixSIMD newm; + RGBAMatrixSIMD operator /(const float s) const { + __m128 f = _mm_set1_ps(s); + RGBAMatrixSIMD newm; - for(int i = 0; i < kNumColorChannels; i++) { - newm.col[i] = _mm_div_ps( col[i], f ); - } + for(int i = 0; i < kNumColorChannels; i++) { + newm.col[i] = _mm_div_ps( col[i], f ); + } - return newm; - } + return newm; + } - RGBAMatrixSIMD &operator /=(const float s) { + RGBAMatrixSIMD &operator /=(const float s) { - __m128 f = _mm_set1_ps(s); + __m128 f = _mm_set1_ps(s); - for(int i = 0; i < kNumColorChannels; i++) { - col[i] = _mm_div_ps(col[i], f); - } + for(int i = 0; i < kNumColorChannels; i++) { + col[i] = _mm_div_ps(col[i], f); + } - return *this; - } + return *this; + } - RGBAMatrixSIMD operator *(const float s) const { - __m128 f = _mm_set1_ps(s); + RGBAMatrixSIMD operator *(const float s) const { + __m128 f = _mm_set1_ps(s); - RGBAMatrixSIMD newm; - for(int i = 0; i < kNumColorChannels; i++) { - newm.col[i] = _mm_mul_ps( col[i], f ); - } - return newm; - } + RGBAMatrixSIMD newm; + for(int i = 0; i < kNumColorChannels; i++) { + newm.col[i] = _mm_mul_ps( col[i], f ); + } + return newm; + } - friend RGBAMatrixSIMD operator *(const float s, const RGBAMatrixSIMD &p) { - __m128 f = _mm_set1_ps(s); - RGBAMatrixSIMD newm; + friend RGBAMatrixSIMD operator *(const float s, const RGBAMatrixSIMD &p) { + __m128 f = _mm_set1_ps(s); + RGBAMatrixSIMD newm; - for(int i = 0; i < kNumColorChannels; i++) { - newm.col[i] = _mm_mul_ps( p.col[i], f ); - } - return newm; - } + for(int i = 0; i < kNumColorChannels; i++) { + newm.col[i] = _mm_mul_ps( p.col[i], f ); + } + return newm; + } - RGBAMatrixSIMD &operator *=(const float s) { - __m128 f = _mm_set1_ps(s); - for(int i = 0; i < kNumColorChannels; i++) - col[i] = _mm_mul_ps(col[i], f); - return *this; - } + RGBAMatrixSIMD &operator *=(const float s) { + __m128 f = _mm_set1_ps(s); + for(int i = 0; i < kNumColorChannels; i++) + col[i] = _mm_mul_ps(col[i], f); + return *this; + } - float &operator ()(const int i, const int j) { - return (*this)[j*4 + i]; - } + float &operator ()(const int i, const int j) { + return (*this)[j*4 + i]; + } - float &operator [](const int i) { - return m[i]; - } + float &operator [](const int i) { + return m[i]; + } - friend bool operator ==(const RGBAMatrixSIMD &rhs, const RGBAMatrixSIMD &lhs) { - - __m128 sum = _mm_set1_ps(0.0f); - for(int i = 0; i < kNumColorChannels; i++) { - __m128 d = _mm_sub_ps(rhs.col[i], lhs.col[i]); - d = _mm_mul_ps(d, d); - __m128 cmp = _mm_cmpgt_ps(d, kEpsilonSIMD); - cmp = _mm_hadd_ps(cmp, cmp); - cmp = _mm_hadd_ps(cmp, cmp); - sum = _mm_add_ps(sum, cmp); - } + friend bool operator ==(const RGBAMatrixSIMD &rhs, const RGBAMatrixSIMD &lhs) { + + __m128 sum = _mm_set1_ps(0.0f); + for(int i = 0; i < kNumColorChannels; i++) { + __m128 d = _mm_sub_ps(rhs.col[i], lhs.col[i]); + d = _mm_mul_ps(d, d); + __m128 cmp = _mm_cmpgt_ps(d, kEpsilonSIMD); + cmp = _mm_hadd_ps(cmp, cmp); + cmp = _mm_hadd_ps(cmp, cmp); + sum = _mm_add_ps(sum, cmp); + } - if(((float *)(&sum))[0] != 0) - return false; - else - return true; - } + if(((float *)(&sum))[0] != 0) + return false; + else + return true; + } - operator float *() { - return m; - } + operator float *() { + return m; + } - RGBAVectorSIMD operator *(const RGBAVectorSIMD &p) const; + RGBAVectorSIMD operator *(const RGBAVectorSIMD &p) const; }; class RGBADirSIMD : public RGBAVectorSIMD { public: - RGBADirSIMD() : RGBAVectorSIMD() { } - RGBADirSIMD(const RGBAVectorSIMD &p) : RGBAVectorSIMD(p) { - this->Normalize(); - } + RGBADirSIMD() : RGBAVectorSIMD() { } + RGBADirSIMD(const RGBAVectorSIMD &p) : RGBAVectorSIMD(p) { + this->Normalize(); + } }; // Makes sure that the values of the endpoints lie between 0 and 1. @@ -353,69 +353,69 @@ extern void ClampEndpoints(RGBAVectorSIMD &p1, RGBAVectorSIMD &p2); class RGBAClusterSIMD { public: - RGBAClusterSIMD() : - m_NumPoints(0), m_Total(0.0f), - m_PointBitString(0), - m_Min(FLT_MAX), - m_Max(-FLT_MAX), - m_PrincipalAxisCached(false) - { } + RGBAClusterSIMD() : + m_NumPoints(0), m_Total(0.0f), + m_PointBitString(0), + m_Min(FLT_MAX), + m_Max(-FLT_MAX), + m_PrincipalAxisCached(false) + { } - RGBAClusterSIMD(const RGBAClusterSIMD &c) : - m_NumPoints(c.m_NumPoints), - m_Total(c.m_Total), - m_PointBitString(c.m_PointBitString), - m_Min(c.m_Min), - m_Max(c.m_Max), - m_PrincipalAxisCached(false) - { - memcpy(this->m_DataPoints, c.m_DataPoints, m_NumPoints * sizeof(RGBAVectorSIMD)); - } + RGBAClusterSIMD(const RGBAClusterSIMD &c) : + m_NumPoints(c.m_NumPoints), + m_Total(c.m_Total), + m_PointBitString(c.m_PointBitString), + m_Min(c.m_Min), + m_Max(c.m_Max), + m_PrincipalAxisCached(false) + { + memcpy(this->m_DataPoints, c.m_DataPoints, m_NumPoints * sizeof(RGBAVectorSIMD)); + } - RGBAClusterSIMD(const RGBAClusterSIMD &left, const RGBAClusterSIMD &right); - RGBAClusterSIMD(const RGBAVectorSIMD &p, int idx) : - m_NumPoints(1), - m_Total(p), - m_PointBitString(0), - m_Min(p), m_Max(p), - m_PrincipalAxisCached(false) - { - m_DataPoints[0] = p; - m_PointBitString |= (1 << idx); - } - - RGBAVectorSIMD GetTotal() const { return m_Total; } - const RGBAVectorSIMD &GetPoint(int idx) const { return m_DataPoints[idx]; } - int GetNumPoints() const { return m_NumPoints; } - RGBAVectorSIMD GetAvg() const { return m_Total / float(m_NumPoints); } + RGBAClusterSIMD(const RGBAClusterSIMD &left, const RGBAClusterSIMD &right); + RGBAClusterSIMD(const RGBAVectorSIMD &p, int idx) : + m_NumPoints(1), + m_Total(p), + m_PointBitString(0), + m_Min(p), m_Max(p), + m_PrincipalAxisCached(false) + { + m_DataPoints[0] = p; + m_PointBitString |= (1 << idx); + } + + RGBAVectorSIMD GetTotal() const { return m_Total; } + const RGBAVectorSIMD &GetPoint(int idx) const { return m_DataPoints[idx]; } + int GetNumPoints() const { return m_NumPoints; } + RGBAVectorSIMD GetAvg() const { return m_Total / float(m_NumPoints); } - void AddPoint(const RGBAVectorSIMD &p, int idx); + void AddPoint(const RGBAVectorSIMD &p, int idx); - void GetBoundingBox(RGBAVectorSIMD &Min, RGBAVectorSIMD &Max) const { - Min = m_Min, Max = m_Max; - } + void GetBoundingBox(RGBAVectorSIMD &Min, RGBAVectorSIMD &Max) const { + Min = m_Min, Max = m_Max; + } - // Returns the error if we were to quantize the colors right now with the given number of buckets and bit mask. - float QuantizedError(const RGBAVectorSIMD &p1, const RGBAVectorSIMD &p2, const uint8 nBuckets, const __m128i &bitMask, const int pbits[2] = NULL, __m128i *indices = NULL) const; + // Returns the error if we were to quantize the colors right now with the given number of buckets and bit mask. + float QuantizedError(const RGBAVectorSIMD &p1, const RGBAVectorSIMD &p2, const uint8 nBuckets, const __m128i &bitMask, const int pbits[2] = NULL, __m128i *indices = NULL) const; - bool AllSamePoint() const { return m_Max == m_Min; } - int GetPointBitString() const { return m_PointBitString; } + bool AllSamePoint() const { return m_Max == m_Min; } + int GetPointBitString() const { return m_PointBitString; } private: - // The number of points in the cluster. - int m_NumPoints; + // The number of points in the cluster. + int m_NumPoints; - RGBAVectorSIMD m_Total; + RGBAVectorSIMD m_Total; - // The points in the cluster. - RGBAVectorSIMD m_DataPoints[kMaxNumDataPoints]; + // The points in the cluster. + RGBAVectorSIMD m_DataPoints[kMaxNumDataPoints]; - RGBAVectorSIMD m_Min, m_Max; - int m_PointBitString; + RGBAVectorSIMD m_Min, m_Max; + int m_PointBitString; - RGBADirSIMD m_PrincipalAxis; - bool m_PrincipalAxisCached; + RGBADirSIMD m_PrincipalAxis; + bool m_PrincipalAxisCached; }; extern void GetPrincipalAxis(const RGBAClusterSIMD &c, RGBADirSIMD &axis); diff --git a/CLTool/src/clwin32.cpp b/CLTool/src/clwin32.cpp index 62b51c4..d746a22 100644 --- a/CLTool/src/clwin32.cpp +++ b/CLTool/src/clwin32.cpp @@ -64,18 +64,18 @@ void PrintUsage() { } void ExtractBasename(const char *filename, char *buf, uint32 bufSz) { - size_t len = strlen(filename); - const char *end = filename + len; - while(--end != filename) { - if(*end == '.') - { - uint32 numChars = int32(end - filename + 1); - uint32 toCopy = (numChars > bufSz)? bufSz : numChars; - memcpy(buf, filename, toCopy); - buf[toCopy - 1] = '\0'; - return; - } - } + size_t len = strlen(filename); + const char *end = filename + len; + while(--end != filename) { + if(*end == '.') + { + uint32 numChars = int32(end - filename + 1); + uint32 toCopy = (numChars > bufSz)? bufSz : numChars; + memcpy(buf, filename, toCopy); + buf[toCopy - 1] = '\0'; + return; + } + } } int _tmain(int argc, _TCHAR* argv[]) @@ -175,7 +175,7 @@ int _tmain(int argc, _TCHAR* argv[]) if(numThreads > 1 && bSaveLog) { bSaveLog = false; fprintf(stderr, "WARNING: Will not save log because implementation is not thread safe.\n" - "If you'd like, send a complaint to pavel@cs.unc.edu to get this done faster.\n"); + "If you'd like, send a complaint to pavel@cs.unc.edu to get this done faster.\n"); } if(fileArg == argc) { @@ -183,16 +183,16 @@ int _tmain(int argc, _TCHAR* argv[]) exit(1); } - char basename[256]; - ExtractBasename(argv[fileArg], basename, 256); + char basename[256]; + ExtractBasename(argv[fileArg], basename, 256); ImageFile file (argv[fileArg]); - if(!file.Load()) { + if(!file.Load()) { fprintf(stderr, "Error loading file: %s\n", argv[fileArg]); return 1; - } + } - const Image *img = file.GetImage(); + const Image *img = file.GetImage(); int numBlocks = (img->GetWidth() * img->GetHeight())/16; BlockStatManager *statManager = NULL; @@ -224,14 +224,14 @@ int _tmain(int argc, _TCHAR* argv[]) } if(bSaveLog) { - strcat_s(basename, ".log"); + strcat_s(basename, ".log"); statManager->ToFile(basename); - basename[strlen(basename) - 4] = '\0'; + basename[strlen(basename) - 4] = '\0'; } strcat_s(basename, "-bc7.png"); - Image cImg (*ci); - ImageFile cImgFile (basename, eFileFormat_PNG, cImg); - cImgFile.Write(); + Image cImg (*ci); + ImageFile cImgFile (basename, eFileFormat_PNG, cImg); + cImgFile.Write(); // Cleanup delete ci; diff --git a/Core/include/Image.h b/Core/include/Image.h index 3de33b9..7086916 100644 --- a/Core/include/Image.h +++ b/Core/include/Image.h @@ -54,7 +54,7 @@ class ImageLoader; class Image { public: - Image(const CompressedImage &); + Image(const CompressedImage &); Image(const ImageLoader &); ~Image(); diff --git a/Core/src/BlockStats.cpp b/Core/src/BlockStats.cpp index 2bd93dd..89de020 100644 --- a/Core/src/BlockStats.cpp +++ b/Core/src/BlockStats.cpp @@ -165,8 +165,8 @@ BlockStatManager::~BlockStatManager() { if(m_Mutex) { - delete m_Mutex; - m_Mutex = 0; + delete m_Mutex; + m_Mutex = 0; } } @@ -206,15 +206,15 @@ void BlockStatManager::ToFile(const CHAR *filename) { CHAR str[256]; #ifdef _MSC_VER - _sntprintf_s(str, 256, _TRUNCATE, "%d,%s\n", i, statStr); + _sntprintf_s(str, 256, _TRUNCATE, "%d,%s\n", i, statStr); #else snprintf(str, 256, "%d,%s\n", i, statStr); #endif uint32 strLen = uint32(strlen(str)); if(strLen > 255) { - str[255] = '\n'; - strLen = 256; + str[255] = '\n'; + strLen = 256; } fstr.Write((uint8 *)str, strLen); diff --git a/Core/src/CompressedImage.cpp b/Core/src/CompressedImage.cpp index 7250840..0f4f393 100644 --- a/Core/src/CompressedImage.cpp +++ b/Core/src/CompressedImage.cpp @@ -70,16 +70,16 @@ CompressedImage::CompressedImage( const CompressedImage &other ) } CompressedImage::CompressedImage( - const unsigned int width, + const unsigned int width, const unsigned int height, const ECompressionFormat format, const unsigned char *data ) -: m_Width(width) -, m_Height(height) -, m_Format(format) -, m_Data(0) -, m_DataSz(0) + : m_Width(width) + , m_Height(height) + , m_Format(format) + , m_Data(0) + , m_DataSz(0) { InitData(data); } @@ -94,7 +94,7 @@ void CompressedImage::InitData(const unsigned char *withData) { case eCompressionFormat_DXT5: m_DataSz = uncompDataSz / 4; break; case eCompressionFormat_BPTC: m_DataSz = uncompDataSz / 4; break; } - + if(m_DataSz > 0) { m_Data = new unsigned char[m_DataSz]; memcpy(m_Data, withData, m_DataSz); diff --git a/Core/src/Image.cpp b/Core/src/Image.cpp index b0e67f6..f507186 100644 --- a/Core/src/Image.cpp +++ b/Core/src/Image.cpp @@ -95,14 +95,14 @@ Image::Image(const CompressedImage &ci) : m_Width(ci.GetWidth()) , m_Height(ci.GetHeight()) { - unsigned int bufSz = ci.GetWidth() * ci.GetHeight() * 4; - m_PixelData = new uint8[ bufSz ]; - if(!m_PixelData) { fprintf(stderr, "%s\n", "Out of memory!"); return; } + unsigned int bufSz = ci.GetWidth() * ci.GetHeight() * 4; + m_PixelData = new uint8[ bufSz ]; + if(!m_PixelData) { fprintf(stderr, "%s\n", "Out of memory!"); return; } - if(!ci.DecompressImage(m_PixelData, bufSz)) { - fprintf(stderr, "Error decompressing image!\n"); - return; - } + if(!ci.DecompressImage(m_PixelData, bufSz)) { + fprintf(stderr, "Error decompressing image!\n"); + return; + } } Image::Image(const ImageLoader &loader) diff --git a/Core/src/StopWatch.h b/Core/src/StopWatch.h index 53d939d..a31dd73 100755 --- a/Core/src/StopWatch.h +++ b/Core/src/StopWatch.h @@ -74,23 +74,23 @@ class StopWatchImpl; class StopWatch { public: - StopWatch(); - StopWatch(const StopWatch &); + StopWatch(); + StopWatch(const StopWatch &); - ~StopWatch(); + ~StopWatch(); - StopWatch &operator=(const StopWatch &); + StopWatch &operator=(const StopWatch &); - void Start(); - void Stop(); - void Reset(); + void Start(); + void Stop(); + void Reset(); - double TimeInSeconds() const; - double TimeInMilliseconds() const; - double TimeInMicroseconds() const; + double TimeInSeconds() const; + double TimeInMilliseconds() const; + double TimeInMicroseconds() const; private: - StopWatchImpl *impl; + StopWatchImpl *impl; }; #endif // __TEXCOMP_STOP_WATCH_H__ diff --git a/Core/src/StopWatchOSX.cpp b/Core/src/StopWatchOSX.cpp index 50f6363..063828a 100644 --- a/Core/src/StopWatchOSX.cpp +++ b/Core/src/StopWatchOSX.cpp @@ -101,7 +101,7 @@ double StopWatch::TimeInSeconds() const { double StopWatch::TimeInMilliseconds() const { return double(impl->duration) / 1e3; } - + double StopWatch::TimeInMicroseconds() const { return double(impl->duration); } diff --git a/Core/src/StopWatchUnix.cpp b/Core/src/StopWatchUnix.cpp index 3694b68..4b17f6b 100644 --- a/Core/src/StopWatchUnix.cpp +++ b/Core/src/StopWatchUnix.cpp @@ -100,7 +100,7 @@ double StopWatch::TimeInSeconds() const { double StopWatch::TimeInMilliseconds() const { return impl->duration * 1000; } - + double StopWatch::TimeInMicroseconds() const { return impl->duration * 1000000; } diff --git a/Core/src/TexComp.cpp b/Core/src/TexComp.cpp index 1621fb5..3cf0bf5 100644 --- a/Core/src/TexComp.cpp +++ b/Core/src/TexComp.cpp @@ -404,5 +404,5 @@ bool CompressImageData( } void YieldThread() { - TCThread::Yield(); + TCThread::Yield(); } diff --git a/Core/src/ThreadGroup.cpp b/Core/src/ThreadGroup.cpp index 33a58eb..49be9f5 100644 --- a/Core/src/ThreadGroup.cpp +++ b/Core/src/ThreadGroup.cpp @@ -115,7 +115,7 @@ ThreadGroup::ThreadGroup( int numThreads, const unsigned char *inBuf, unsigned i , m_CompressedBlockSize( (func == BC7C::Compress #ifdef HAS_SSE_41 - || func == BC7C::CompressImageBC7SIMD + || func == BC7C::CompressImageBC7SIMD #endif )? 16 @@ -125,7 +125,7 @@ ThreadGroup::ThreadGroup( int numThreads, const unsigned char *inBuf, unsigned i , m_UncompressedBlockSize( (func == BC7C::Compress #ifdef HAS_SSE_41 - || func == BC7C::CompressImageBC7SIMD + || func == BC7C::CompressImageBC7SIMD #endif )? 64 diff --git a/Core/src/WorkerQueue.cpp b/Core/src/WorkerQueue.cpp index 7f74e56..f99499f 100644 --- a/Core/src/WorkerQueue.cpp +++ b/Core/src/WorkerQueue.cpp @@ -81,40 +81,39 @@ void WorkerThread::operator()() { bool quitFlag = false; while(!quitFlag) { - switch(m_Parent->AcceptThreadData(m_ThreadIdx)) - { + switch(m_Parent->AcceptThreadData(m_ThreadIdx)) { case eAction_Quit: { - quitFlag = true; - break; + quitFlag = true; + break; } case eAction_Wait: { - TCThread::Yield(); - break; + TCThread::Yield(); + break; } case eAction_DoWork: { - const uint8 *src = m_Parent->GetSrcForThread(m_ThreadIdx); - uint8 *dst = m_Parent->GetDstForThread(m_ThreadIdx); + const uint8 *src = m_Parent->GetSrcForThread(m_ThreadIdx); + uint8 *dst = m_Parent->GetDstForThread(m_ThreadIdx); - CompressionJob cj (src, dst, 4 * m_Parent->GetNumBlocksForThread(m_ThreadIdx), 4); - if(f) - (*f)(cj); - else - (*fStat)(cj, *statManager); + CompressionJob cj (src, dst, 4 * m_Parent->GetNumBlocksForThread(m_ThreadIdx), 4); + if(f) + (*f)(cj); + else + (*fStat)(cj, *statManager); - break; + break; } default: { - fprintf(stderr, "Unrecognized thread command!\n"); - quitFlag = true; - break; + fprintf(stderr, "Unrecognized thread command!\n"); + quitFlag = true; + break; } } } @@ -244,10 +243,10 @@ WorkerThread::EAction WorkerQueue::AcceptThreadData(uint32 threadIdx) { if(m_NextBlock == totalBlocks) { if(m_NumCompressions < m_TotalNumCompressions) { if(++m_WaitingThreads == m_ActiveThreads) { - m_NextBlock = 0; - m_WaitingThreads = 0; + m_NextBlock = 0; + m_WaitingThreads = 0; } else { - return WorkerThread::eAction_Wait; + return WorkerThread::eAction_Wait; } } else { diff --git a/IO/src/FileStreamWin32.cpp b/IO/src/FileStreamWin32.cpp index e51eb8a..a9e1581 100755 --- a/IO/src/FileStreamWin32.cpp +++ b/IO/src/FileStreamWin32.cpp @@ -1,3 +1,55 @@ +/* FasTC + * Copyright (c) 2012 University of North Carolina at Chapel Hill. + * All rights reserved. + * + * Permission to use, copy, modify, and distribute this software and its + * documentation for educational, research, and non-profit purposes, without + * fee, and without a written agreement is hereby granted, provided that the + * above copyright notice, this paragraph, and the following four paragraphs + * appear in all copies. + * + * Permission to incorporate this software into commercial products may be + * obtained by contacting the authors or the Office of Technology Development + * at the University of North Carolina at Chapel Hill . + * + * This software program and documentation are copyrighted by the University of + * North Carolina at Chapel Hill. The software program and documentation are + * supplied "as is," without any accompanying services from the University of + * North Carolina at Chapel Hill or the authors. The University of North + * Carolina at Chapel Hill and the authors do not warrant that the operation of + * the program will be uninterrupted or error-free. The end-user understands + * that the program was developed for research purposes and is advised not to + * rely exclusively on the program for any reason. + * + * IN NO EVENT SHALL THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE + * AUTHORS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, + * OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF + * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF NORTH CAROLINA + * AT CHAPEL HILL OR THE AUTHORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS SPECIFICALLY + * DISCLAIM ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY + * STATUTORY WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON + * AN "AS IS" BASIS, AND THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND + * THE AUTHORS HAVE NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, + * ENHANCEMENTS, OR MODIFICATIONS. + * + * Please send all BUG REPORTS to . + * + * The authors may be contacted via: + * + * Pavel Krajcevski + * Dept of Computer Science + * 201 S Columbia St + * Frederick P. Brooks, Jr. Computer Science Bldg + * Chapel Hill, NC 27599-3175 + * USA + * + * + */ + #include "FileStream.h" #include @@ -54,7 +106,7 @@ public: : m_ReferenceCount(1) { - DWORD dwDesiredAccess = GENERIC_READ; + DWORD dwDesiredAccess = GENERIC_READ; DWORD dwOpenAction = OPEN_EXISTING; switch(mode) { default: @@ -71,13 +123,13 @@ public: case eFileMode_WriteAppend: case eFileMode_WriteBinaryAppend: - dwDesiredAccess = FILE_APPEND_DATA; + dwDesiredAccess = FILE_APPEND_DATA; dwOpenAction = CREATE_NEW; break; } m_Handle = CreateFile(filename, dwDesiredAccess, 0, NULL, dwOpenAction, FILE_ATTRIBUTE_NORMAL, NULL); - if(m_Handle == INVALID_HANDLE_VALUE) { + if(m_Handle == INVALID_HANDLE_VALUE) { ErrorExit(TEXT("CreateFile")); } } @@ -145,15 +197,14 @@ FileStream::~FileStream() { int32 FileStream::Read(uint8 *buf, uint32 bufSz) { - if( - m_Mode == eFileMode_Write || + if(m_Mode == eFileMode_Write || m_Mode == eFileMode_WriteBinary || m_Mode == eFileMode_WriteAppend || m_Mode == eFileMode_WriteBinaryAppend ) { CHAR errStr[256]; - _sntprintf_s(errStr, 256, "Cannot read from file '%s': File opened for reading.", m_Filename); - OutputDebugString(errStr); + _sntprintf_s(errStr, 256, "Cannot read from file '%s': File opened for reading.", m_Filename); + OutputDebugString(errStr); return -2; } @@ -163,27 +214,27 @@ int32 FileStream::Read(uint8 *buf, uint32 bufSz) { DWORD oldPosition = SetFilePointer(fp, 0, NULL, FILE_CURRENT); if(INVALID_SET_FILE_POINTER == oldPosition) { - CHAR errStr[256]; - _sntprintf_s(errStr, 256, "Error querying the file position before reading from file '%s'(0x%x).", m_Filename, GetLastError()); - OutputDebugString(errStr); - return -1; + CHAR errStr[256]; + _sntprintf_s(errStr, 256, "Error querying the file position before reading from file '%s'(0x%x).", m_Filename, GetLastError()); + OutputDebugString(errStr); + return -1; } DWORD amtRead; BOOL success = ReadFile(fp, buf, bufSz, &amtRead, NULL); if(!success) { - CHAR errStr[256]; - _sntprintf_s(errStr, 256, "Error reading from file '%s'.", m_Filename); - OutputDebugString(errStr); + CHAR errStr[256]; + _sntprintf_s(errStr, 256, "Error reading from file '%s'.", m_Filename); + OutputDebugString(errStr); return -1; } DWORD newPosition = SetFilePointer(fp, 0, NULL, FILE_CURRENT); if(INVALID_SET_FILE_POINTER == newPosition) { - CHAR errStr[256]; - _sntprintf_s(errStr, 256, "Error querying the file position after reading from file '%s'(0x%x).", m_Filename, GetLastError()); - OutputDebugString(errStr); - return -1; + CHAR errStr[256]; + _sntprintf_s(errStr, 256, "Error querying the file position after reading from file '%s'(0x%x).", m_Filename, GetLastError()); + OutputDebugString(errStr); + return -1; } return newPosition - oldPosition; @@ -194,9 +245,9 @@ int32 FileStream::Write(const uint8 *buf, uint32 bufSz) { m_Mode == eFileMode_Read || m_Mode == eFileMode_ReadBinary ) { - CHAR errStr[256]; - _sntprintf_s(errStr, 256, "Cannot write to file '%s': File opened for writing.", m_Filename); - OutputDebugString(errStr); + CHAR errStr[256]; + _sntprintf_s(errStr, 256, "Cannot write to file '%s': File opened for writing.", m_Filename); + OutputDebugString(errStr); return -2; } @@ -213,10 +264,10 @@ int32 FileStream::Write(const uint8 *buf, uint32 bufSz) { } if(INVALID_SET_FILE_POINTER == dwPos) { - CHAR errStr[256]; - _sntprintf_s(errStr, 256, "Error querying the file position before reading to file '%s'(0x%x).", m_Filename, GetLastError()); - OutputDebugString(errStr); - return -1; + CHAR errStr[256]; + _sntprintf_s(errStr, 256, "Error querying the file position before reading to file '%s'(0x%x).", m_Filename, GetLastError()); + OutputDebugString(errStr); + return -1; } while(!LockFile(fp, dwPos, 0, bufSz, 0)) Sleep(1); @@ -227,9 +278,9 @@ int32 FileStream::Write(const uint8 *buf, uint32 bufSz) { UnlockFile(fp, dwPos, 0, bufSz, 0); if(!success) { - CHAR errStr[256]; - _sntprintf_s(errStr, 256, "Error writing to file '%s'.", m_Filename); - OutputDebugString(errStr); + CHAR errStr[256]; + _sntprintf_s(errStr, 256, "Error writing to file '%s'.", m_Filename); + OutputDebugString(errStr); return -1; } @@ -244,17 +295,17 @@ int32 FileStream::Tell() { DWORD pos = SetFilePointer(fp, 0, NULL, FILE_CURRENT); if(INVALID_SET_FILE_POINTER == pos) { - CHAR errStr[256]; - _sntprintf_s(errStr, 256, "Error querying the file position before reading to file '%s'(0x%x).", m_Filename, GetLastError()); - OutputDebugString(errStr); - return -1; + CHAR errStr[256]; + _sntprintf_s(errStr, 256, "Error querying the file position before reading to file '%s'(0x%x).", m_Filename, GetLastError()); + OutputDebugString(errStr); + return -1; } return pos; } bool FileStream::Seek(uint32 offset, ESeekPosition pos) { - + // We cannot seek in append mode. if(m_Mode == eFileMode_WriteAppend || m_Mode == eFileMode_WriteBinaryAppend) return false; @@ -264,17 +315,17 @@ bool FileStream::Seek(uint32 offset, ESeekPosition pos) { DWORD origin = FILE_BEGIN; switch(pos) { - default: - case eSeekPosition_Beginning: - // Do nothing + default: + case eSeekPosition_Beginning: + // Do nothing break; - case eSeekPosition_Current: - origin = FILE_CURRENT; + case eSeekPosition_Current: + origin = FILE_CURRENT; break; - case eSeekPosition_End: - origin = FILE_END; + case eSeekPosition_End: + origin = FILE_END; break; } diff --git a/IO/src/ImageFile.cpp b/IO/src/ImageFile.cpp index 0fb476c..cd1596e 100644 --- a/IO/src/ImageFile.cpp +++ b/IO/src/ImageFile.cpp @@ -125,7 +125,7 @@ bool ImageFile::Load() { delete m_Image; m_Image = NULL; } - + unsigned char *rawData = ReadFileData(m_Filename); if(rawData) { m_Image = LoadImage(rawData); @@ -272,7 +272,7 @@ unsigned char *ImageFile::ReadFileData(const CHAR *filename) { bool ImageFile::WriteImageDataToFile(const uint8 *data, const uint32 dataSz, const CHAR *filename) { - + // Open a file stream and write out the data... FileStream fstr (filename, eFileMode_WriteBinary); if(fstr.Tell() < 0) { diff --git a/IO/src/ImageLoader.cpp b/IO/src/ImageLoader.cpp index b6dc03f..57886b1 100644 --- a/IO/src/ImageLoader.cpp +++ b/IO/src/ImageLoader.cpp @@ -172,54 +172,54 @@ bool ImageLoader::LoadImage() { // For each block, visit the pixels in sequential order for(uint32 y = i; y < i+4; y++) { - for(uint32 x = j; x < j+4; x++) { + for(uint32 x = j; x < j+4; x++) { - if(y >= m_Height || x >= m_Width) { - m_PixelData[byteIdx++] = 0; // r - m_PixelData[byteIdx++] = 0; // g - m_PixelData[byteIdx++] = 0; // b - m_PixelData[byteIdx++] = 0; // a - continue; - } + if(y >= m_Height || x >= m_Width) { + m_PixelData[byteIdx++] = 0; // r + m_PixelData[byteIdx++] = 0; // g + m_PixelData[byteIdx++] = 0; // b + m_PixelData[byteIdx++] = 0; // a + continue; + } - unsigned int redVal = GetChannelForPixel(x, y, 0); - if(redVal == INT_MAX) - return false; + unsigned int redVal = GetChannelForPixel(x, y, 0); + if(redVal == INT_MAX) + return false; - unsigned int greenVal = redVal; - unsigned int blueVal = redVal; + unsigned int greenVal = redVal; + unsigned int blueVal = redVal; - if(GetGreenChannelPrecision() > 0) { - greenVal = GetChannelForPixel(x, y, 1); - if(greenVal == INT_MAX) - return false; - } + if(GetGreenChannelPrecision() > 0) { + greenVal = GetChannelForPixel(x, y, 1); + if(greenVal == INT_MAX) + return false; + } - if(GetBlueChannelPrecision() > 0) { - blueVal = GetChannelForPixel(x, y, 2); - if(blueVal == INT_MAX) - return false; - } + if(GetBlueChannelPrecision() > 0) { + blueVal = GetChannelForPixel(x, y, 2); + if(blueVal == INT_MAX) + return false; + } - unsigned int alphaVal = 0xFF; - if(GetAlphaChannelPrecision() > 0) { - alphaVal = GetChannelForPixel(x, y, 3); - if(alphaVal == INT_MAX) - return false; - } + unsigned int alphaVal = 0xFF; + if(GetAlphaChannelPrecision() > 0) { + alphaVal = GetChannelForPixel(x, y, 3); + if(alphaVal == INT_MAX) + return false; + } - // Red channel - m_PixelData[byteIdx++] = redVal & 0xFF; + // Red channel + m_PixelData[byteIdx++] = redVal & 0xFF; - // Green channel - m_PixelData[byteIdx++] = greenVal & 0xFF; + // Green channel + m_PixelData[byteIdx++] = greenVal & 0xFF; - // Blue channel - m_PixelData[byteIdx++] = blueVal & 0xFF; + // Blue channel + m_PixelData[byteIdx++] = blueVal & 0xFF; - // Alpha channel - m_PixelData[byteIdx++] = alphaVal & 0xFF; - } + // Alpha channel + m_PixelData[byteIdx++] = alphaVal & 0xFF; + } } } } diff --git a/IO/src/ImageLoaderPNG.cpp b/IO/src/ImageLoaderPNG.cpp index 48f4411..04a4ecc 100644 --- a/IO/src/ImageLoaderPNG.cpp +++ b/IO/src/ImageLoaderPNG.cpp @@ -54,10 +54,8 @@ static void ReportError(const char *msg) { class PNGStreamReader { public: - static void ReadDataFromStream( - png_structp png_ptr, - png_bytep outBytes, - png_size_t byteCountToRead + static void ReadDataFromStream(png_structp png_ptr, + png_bytep outBytes, png_size_t byteCountToRead ) { png_voidp io_ptr = png_get_io_ptr( png_ptr ); if( io_ptr == NULL ) { @@ -120,9 +118,9 @@ bool ImageLoaderPNG::ReadData() { int colorType = -1; if( 1 != png_get_IHDR(png_ptr, info_ptr, - (png_uint_32 *)(&m_Width), (png_uint_32 *)(&m_Height), - &bitDepth, &colorType, - NULL, NULL, NULL) + (png_uint_32 *)(&m_Width), (png_uint_32 *)(&m_Height), + &bitDepth, &colorType, + NULL, NULL, NULL) ) { ReportError("Could not read PNG header"); png_destroy_read_struct(&png_ptr, NULL, NULL); @@ -140,33 +138,33 @@ bool ImageLoaderPNG::ReadData() { png_bytep rowData = new png_byte[bpr]; switch(colorType) { - default: - case PNG_COLOR_TYPE_PALETTE: - ReportError("PNG color type unsupported"); - png_destroy_read_struct(&png_ptr, NULL, NULL); - return false; + default: + case PNG_COLOR_TYPE_PALETTE: + ReportError("PNG color type unsupported"); + png_destroy_read_struct(&png_ptr, NULL, NULL); + return false; - case PNG_COLOR_TYPE_GRAY: { + case PNG_COLOR_TYPE_GRAY: { m_RedChannelPrecision = bitDepth; m_RedData = new unsigned char[numPixels]; for(uint32 i = 0; i < m_Height; i++) { - - png_read_row(png_ptr, rowData, NULL); + + png_read_row(png_ptr, rowData, NULL); - unsigned int rowOffset = i * m_Width; - - unsigned int byteIdx = 0; - for(uint32 j = 0; j < m_Width; j++) { - m_RedData[rowOffset + j] = rowData[byteIdx++]; - } + unsigned int rowOffset = i * m_Width; + + unsigned int byteIdx = 0; + for(uint32 j = 0; j < m_Width; j++) { + m_RedData[rowOffset + j] = rowData[byteIdx++]; + } - assert(byteIdx == bpr); + assert(byteIdx == bpr); } } break; - case PNG_COLOR_TYPE_RGB: + case PNG_COLOR_TYPE_RGB: m_RedChannelPrecision = bitDepth; m_RedData = new unsigned char[numPixels]; m_GreenChannelPrecision = bitDepth; @@ -175,23 +173,23 @@ bool ImageLoaderPNG::ReadData() { m_BlueData = new unsigned char[numPixels]; for(uint32 i = 0; i < m_Height; i++) { - - png_read_row(png_ptr, rowData, NULL); + + png_read_row(png_ptr, rowData, NULL); - unsigned int rowOffset = i * m_Width; - - unsigned int byteIdx = 0; - for(uint32 j = 0; j < m_Width; j++) { - m_RedData[rowOffset + j] = rowData[byteIdx++]; - m_GreenData[rowOffset + j] = rowData[byteIdx++]; - m_BlueData[rowOffset + j] = rowData[byteIdx++]; - } + unsigned int rowOffset = i * m_Width; + + unsigned int byteIdx = 0; + for(uint32 j = 0; j < m_Width; j++) { + m_RedData[rowOffset + j] = rowData[byteIdx++]; + m_GreenData[rowOffset + j] = rowData[byteIdx++]; + m_BlueData[rowOffset + j] = rowData[byteIdx++]; + } - assert(byteIdx == bpr); + assert(byteIdx == bpr); } break; - case PNG_COLOR_TYPE_RGB_ALPHA: + case PNG_COLOR_TYPE_RGB_ALPHA: m_RedChannelPrecision = bitDepth; m_RedData = new unsigned char[numPixels]; m_GreenChannelPrecision = bitDepth; @@ -202,42 +200,42 @@ bool ImageLoaderPNG::ReadData() { m_AlphaData = new unsigned char[numPixels]; for(uint32 i = 0; i < m_Height; i++) { - - png_read_row(png_ptr, rowData, NULL); + + png_read_row(png_ptr, rowData, NULL); - unsigned int rowOffset = i * m_Width; - - unsigned int byteIdx = 0; - for(uint32 j = 0; j < m_Width; j++) { - m_RedData[rowOffset + j] = rowData[byteIdx++]; - m_GreenData[rowOffset + j] = rowData[byteIdx++]; - m_BlueData[rowOffset + j] = rowData[byteIdx++]; - m_AlphaData[rowOffset + j] = rowData[byteIdx++]; - } + unsigned int rowOffset = i * m_Width; + + unsigned int byteIdx = 0; + for(uint32 j = 0; j < m_Width; j++) { + m_RedData[rowOffset + j] = rowData[byteIdx++]; + m_GreenData[rowOffset + j] = rowData[byteIdx++]; + m_BlueData[rowOffset + j] = rowData[byteIdx++]; + m_AlphaData[rowOffset + j] = rowData[byteIdx++]; + } - assert(byteIdx == bpr); + assert(byteIdx == bpr); } break; - case PNG_COLOR_TYPE_GRAY_ALPHA: + case PNG_COLOR_TYPE_GRAY_ALPHA: m_RedChannelPrecision = bitDepth; m_RedData = new unsigned char[numPixels]; m_AlphaChannelPrecision = bitDepth; m_AlphaData = new unsigned char[numPixels]; for(uint32 i = 0; i < m_Height; i++) { - - png_read_row(png_ptr, rowData, NULL); + + png_read_row(png_ptr, rowData, NULL); - unsigned int rowOffset = i * m_Width; - - unsigned int byteIdx = 0; - for(uint32 j = 0; j < m_Width; j++) { - m_RedData[rowOffset + j] = rowData[byteIdx++]; - m_AlphaData[rowOffset + j] = rowData[byteIdx++]; - } + unsigned int rowOffset = i * m_Width; + + unsigned int byteIdx = 0; + for(uint32 j = 0; j < m_Width; j++) { + m_RedData[rowOffset + j] = rowData[byteIdx++]; + m_AlphaData[rowOffset + j] = rowData[byteIdx++]; + } - assert(byteIdx == bpr); + assert(byteIdx == bpr); } break; } diff --git a/IO/src/ImageWriterPNG.cpp b/IO/src/ImageWriterPNG.cpp index 8ada946..2419208 100644 --- a/IO/src/ImageWriterPNG.cpp +++ b/IO/src/ImageWriterPNG.cpp @@ -66,87 +66,87 @@ public: ImageWriterPNG &writer = *(ImageWriterPNG *)(io_ptr); - while(writer.m_StreamPosition + byteCountToWrite > writer.m_RawFileDataSz) { - uint8 *newData = new uint8[writer.m_RawFileDataSz << 1]; - memcpy(newData, writer.m_RawFileData, writer.m_RawFileDataSz); - writer.m_RawFileDataSz <<= 1; - delete writer.m_RawFileData; - writer.m_RawFileData = newData; - } + while(writer.m_StreamPosition + byteCountToWrite > writer.m_RawFileDataSz) { + uint8 *newData = new uint8[writer.m_RawFileDataSz << 1]; + memcpy(newData, writer.m_RawFileData, writer.m_RawFileDataSz); + writer.m_RawFileDataSz <<= 1; + delete writer.m_RawFileData; + writer.m_RawFileData = newData; + } - unsigned char *stream = &(writer.m_RawFileData[writer.m_StreamPosition]); + unsigned char *stream = &(writer.m_RawFileData[writer.m_StreamPosition]); memcpy(stream, outBytes, byteCountToWrite); writer.m_StreamPosition += byteCountToWrite; } - static void FlushStream(png_structp png_ptr) { /* Do nothing... */ } + static void FlushStream(png_structp png_ptr) { /* Do nothing... */ } }; ImageWriterPNG::ImageWriterPNG(const Image &im) - : ImageWriter(im.GetWidth(), im.GetHeight(), im.RawData()) + : ImageWriter(im.GetWidth(), im.GetHeight(), im.RawData()) , m_StreamPosition(0) { } bool ImageWriterPNG::WriteImage() { - png_structp png_ptr = NULL; - png_infop info_ptr = NULL; - png_byte ** row_pointers = NULL; - int pixel_size = 4; - int depth = 8; + png_structp png_ptr = NULL; + png_infop info_ptr = NULL; + png_byte ** row_pointers = NULL; + int pixel_size = 4; + int depth = 8; - png_ptr = png_create_write_struct (PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); - if (png_ptr == NULL) { - return false; - } + png_ptr = png_create_write_struct (PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); + if (png_ptr == NULL) { + return false; + } - info_ptr = png_create_info_struct (png_ptr); - if (info_ptr == NULL) { - png_destroy_write_struct (&png_ptr, &info_ptr); - return false; - } + info_ptr = png_create_info_struct (png_ptr); + if (info_ptr == NULL) { + png_destroy_write_struct (&png_ptr, &info_ptr); + return false; + } - /* Set image attributes. */ + /* Set image attributes. */ - png_set_IHDR (png_ptr, - info_ptr, - m_Width, - m_Height, - depth, - PNG_COLOR_TYPE_RGBA, - PNG_INTERLACE_NONE, - PNG_COMPRESSION_TYPE_DEFAULT, - PNG_FILTER_TYPE_DEFAULT); + png_set_IHDR (png_ptr, + info_ptr, + m_Width, + m_Height, + depth, + PNG_COLOR_TYPE_RGBA, + PNG_INTERLACE_NONE, + PNG_COMPRESSION_TYPE_DEFAULT, + PNG_FILTER_TYPE_DEFAULT); - /* Initialize rows of PNG. */ + /* Initialize rows of PNG. */ - row_pointers = (png_byte **)png_malloc (png_ptr, m_Height * sizeof (png_byte *)); - for (uint32 y = 0; y < m_Height; ++y) { - png_byte *row = (png_byte *)png_malloc (png_ptr, sizeof (uint8) * m_Width * pixel_size); + row_pointers = (png_byte **)png_malloc (png_ptr, m_Height * sizeof (png_byte *)); + for (uint32 y = 0; y < m_Height; ++y) { + png_byte *row = (png_byte *)png_malloc (png_ptr, sizeof (uint8) * m_Width * pixel_size); - row_pointers[y] = row; + row_pointers[y] = row; - for (uint32 x = 0; x < m_Width; ++x) { - for(uint32 ch = 0; ch < 4; ch++) { - *row++ = GetChannelForPixel(x, y, ch); - } + for (uint32 x = 0; x < m_Width; ++x) { + for(uint32 ch = 0; ch < 4; ch++) { + *row++ = GetChannelForPixel(x, y, ch); + } } - } + } - png_set_write_fn(png_ptr, this, PNGStreamWriter::WriteDataToStream, PNGStreamWriter::FlushStream); - png_set_rows (png_ptr, info_ptr, row_pointers); - png_write_png (png_ptr, info_ptr, PNG_TRANSFORM_IDENTITY, NULL); + png_set_write_fn(png_ptr, this, PNGStreamWriter::WriteDataToStream, PNGStreamWriter::FlushStream); + png_set_rows (png_ptr, info_ptr, row_pointers); + png_write_png (png_ptr, info_ptr, PNG_TRANSFORM_IDENTITY, NULL); - for (uint32 y = 0; y < m_Height; y++) { - png_free (png_ptr, row_pointers[y]); - } - png_free (png_ptr, row_pointers); + for (uint32 y = 0; y < m_Height; y++) { + png_free (png_ptr, row_pointers[y]); + } + png_free (png_ptr, row_pointers); - png_destroy_write_struct (&png_ptr, &info_ptr); + png_destroy_write_struct (&png_ptr, &info_ptr); - m_RawFileDataSz = m_StreamPosition; - return true; + m_RawFileDataSz = m_StreamPosition; + return true; } diff --git a/IO/src/ImageWriterPNG.h b/IO/src/ImageWriterPNG.h index 90b99e0..6b67059 100644 --- a/IO/src/ImageWriterPNG.h +++ b/IO/src/ImageWriterPNG.h @@ -55,8 +55,8 @@ class ImageWriterPNG : public ImageWriter { virtual bool WriteImage(); private: - uint32 m_StreamPosition; - friend class PNGStreamWriter; + uint32 m_StreamPosition; + friend class PNGStreamWriter; }; #endif // _IMAGE_LOADER_H_