From 345292e36a27d79f8bf83e7528bb9cfa692c06d3 Mon Sep 17 00:00:00 2001 From: Pavel Krajcevski Date: Sun, 13 Oct 2013 19:28:41 -0400 Subject: [PATCH] Grab compressor from experimental branch --- PVRTCEncoder/src/Compressor.cpp | 801 +++++++++++++++++++++++++------- 1 file changed, 639 insertions(+), 162 deletions(-) diff --git a/PVRTCEncoder/src/Compressor.cpp b/PVRTCEncoder/src/Compressor.cpp index db2113e..24d94b8 100644 --- a/PVRTCEncoder/src/Compressor.cpp +++ b/PVRTCEncoder/src/Compressor.cpp @@ -54,10 +54,13 @@ #include #include +#include +#include #include #include #include "Pixel.h" +#include "Color.h" #include "PVRTCImage.h" #include "Block.h" @@ -86,211 +89,685 @@ namespace PVRTCC { return x | (y << 1); } - template - static T Clamp(const T &v, const T &low, const T &high) { - return ::std::min(::std::max(low, v), high); + struct Label { + uint8 distance; + uint8 nLabels; + static const int kMaxNumIdxs = 16; + uint8 times[kMaxNumIdxs]; + uint32 idxs[kMaxNumIdxs]; + float blockIntensity; + + void AddIdx(uint32 idx) { + for(uint32 i = 0; i < nLabels; i++) { + if(idxs[i] == idx) { + times[i]++; + return; + } + } + + assert(nLabels < kMaxNumIdxs); + times[nLabels] = 1; + idxs[nLabels] = idx; + nLabels++; + } + + void Combine(const Label &other) { + for(uint32 i = 0; i < other.nLabels; i++) { + AddIdx(other.idxs[i]); + } + } + }; + + struct CompressionLabel { + bool bCachedIntensity; + float intensity; + Label highLabel; + Label lowLabel; + }; + + static float LookupIntensity(CompressionLabel *labels, const uint32 *pixels, + const uint32 idx) { + if(labels[idx].bCachedIntensity) { + return labels[idx].intensity; + } + + uint32 pixel = pixels[idx]; + const float a = static_cast((pixel >> 24) & 0xFF) / 255.0f; + const float r = a * static_cast(pixel & 0xFF) / 255.0f; + const float g = a * static_cast((pixel >> 8) & 0xFF) / 255.0f; + const float b = a * static_cast((pixel >> 16) & 0xFF) / 255.0f; + + labels[idx].intensity = r * 0.2126f + g * 0.7152f + b * 0.0722f; + labels[idx].bCachedIntensity = true; + return labels[idx].intensity; } - static const Pixel &Lookup(const Image &img, - int32 x, int32 y, - uint32 width, uint32 height, - const EWrapMode wrapMode) { - int32 w = static_cast(width); - int32 h = static_cast(height); + enum EExtremaResult { + eExtremaResult_Neither, + eExtremaResult_LocalMin, + eExtremaResult_LocalMax + }; - assert(w >= 0); - assert(h >= 0); + static EExtremaResult ComputeLocalExtrema( + CompressionLabel *labels, const uint8 *inBuf, + const uint32 x, const uint32 y, const uint32 width, const uint32 height) { - while(x >= w) { - x = (wrapMode == eWrapMode_Wrap)? x - w : w - 1; + assert(x < width); + assert(y < height); + + const uint32 *pixels = reinterpret_cast(inBuf); + uint8 i0 = static_cast(255.0f * LookupIntensity(labels, pixels, y*width + x) + 0.5f); + + int32 ng = 0; + int32 nl = 0; + + const int32 kKernelSz = 3; + const int32 kHalfKernelSz = kKernelSz >> 1; + for(int32 j = -kHalfKernelSz; j <= kHalfKernelSz; j++) + for(int32 i = -kHalfKernelSz; i <= kHalfKernelSz; i++) { + + if(i == 0 && j == 0) continue; + + int32 xx = (i + static_cast(x + width)) % width; + int32 yy = (j + static_cast(y + height)) % height; + + assert(xx >= 0 && xx < static_cast(width)); + assert(yy >= 0 && yy < static_cast(height)); + + uint32 idx = static_cast(xx) + width * static_cast(yy); + uint8 ix = static_cast(255.0f * LookupIntensity(labels, pixels, idx) + 0.5f); + if(ix >= i0) { + ng++; + } + + if(ix <= i0) { + nl++; + } } - while(x < 0) { - x = (wrapMode == eWrapMode_Wrap)? x + w : 0; + EExtremaResult result = eExtremaResult_Neither; + if(ng == nl) { + return result; } - while(y >= h) { - y = (wrapMode == eWrapMode_Wrap)? y - h : h - 1; + CompressionLabel &l = labels[y*width + x]; + const int32 kThreshold = kKernelSz * kKernelSz - 1; + if(ng >= kThreshold) { + l.lowLabel.distance = 1; + l.lowLabel.AddIdx(y*width+x); + result = eExtremaResult_LocalMin; + } else if(nl >= kThreshold) { + l.highLabel.distance = 1; + l.highLabel.AddIdx(y*width+x); + result = eExtremaResult_LocalMax; } - while(y < 0) { - y = (wrapMode == eWrapMode_Wrap)? y + h : 0; - } - - return img(x, y); + return result; } - void Compress(const CompressionJob &dcj, - bool bTwoBitMode, - const EWrapMode wrapMode) { - Image img(dcj.height, dcj.width); - uint32 nPixels = dcj.height * dcj.width; - for(uint32 i = 0; i < nPixels; i++) { - uint32 x = i % dcj.width; - uint32 y = i / dcj.width; + static void DilateLabelForward(Label &l, const Label &up, const Label &left) { - const uint32 *pixels = reinterpret_cast(dcj.inBuf); - img(x, y).Unpack(pixels[i]); + if(l.distance == 1) { + return; } - Image original = img; - img.DebugOutput("Original"); + if(l.nLabels == 0) { + l.nLabels = 0; + } - // Downscale it using anisotropic diffusion based scheme in order to preserve - // image features, then reupscale and compute deltas. Use deltas to generate - // initial A & B images followed by modulation data. - img.ContentAwareDownscale(1, 1, eWrapMode_Wrap, true); - img.ContentAwareDownscale(1, 1, eWrapMode_Wrap, false); + // Are we in no position to dilate? + if(up.distance == 0 && left.distance == 0) { + return; + } - Image downscaled = img; + // Have we visited up yet? + if(up.distance == 0) { + if(left.distance < 4) { + l.distance = left.distance + 1; + l.AddIdx(left.idxs[0]); + } + return; + } - // Upscale it again - img.BilinearUpscale(2, 2, eWrapMode_Wrap); + // Have we visited left yet? + if(left.distance == 0) { + if(up.distance < 4) { + l.distance = up.distance + 1; + l.AddIdx(up.idxs[0]); + } + return; + } - img.DebugOutput("Reconstruction"); + // Otherwise, if they're the same, then we're at a corner... + if(left.distance == up.distance) { + if(left.idxs[0] == up.idxs[0]) { + l.distance = left.distance; + l.AddIdx(left.idxs[0]); + } else { + if(up.distance < 4) { + l.distance = up.distance + 1; + l.AddIdx(up.idxs[0]); + } + } + return; + } - // Compute difference... - ::std::vector difference; - difference.resize(dcj.height * dcj.width * 4); - for(uint32 j = 0; j < dcj.height; j++) { - for(uint32 i = 0; i < dcj.width; i++) { - for(uint32 c = 0; c < 4; c++) { - int16 o = original(i, j).Component(c); - int16 n = img(i, j).Component(c); - difference[j*dcj.width*4 + i*4 + c] = o - n; + // Otherwise, we're at a disjoint part, so take the minimum and add + // one to the distance and assume their index... + if(left.distance < up.distance) { + l.distance = left.distance + 1; + l.AddIdx(left.idxs[0]); + } else { // up.distance < left.distance + l.distance = up.distance + 1; + l.AddIdx(up.idxs[0]); + } + } + + static void LabelImageForward(CompressionLabel *labels, + const uint8 *inBuf, + const uint32 w, const uint32 h) { + for(uint32 j = 0; j < h+3; j++) { + for(uint32 i = 0; i < w; i++) { + EExtremaResult result = ComputeLocalExtrema(labels, inBuf, i, j % h, w, h); + bool dilateMax = result != eExtremaResult_LocalMax; + bool dilateMin = result != eExtremaResult_LocalMin; + + if(dilateMax || dilateMin) { + // Look up and to the left to determine the distance... + uint32 upIdx = ((j+h-1) % h) * w + i; + uint32 leftIdx = (j % h) * w + ((i+w-1) % w); + + CompressionLabel &l = labels[(j % h)*w + i]; + CompressionLabel &up = labels[upIdx]; + CompressionLabel &left = labels[leftIdx]; + + if(dilateMax) { + DilateLabelForward(l.highLabel, up.highLabel, left.highLabel); + } + + if(dilateMin) { + DilateLabelForward(l.lowLabel, up.lowLabel, left.lowLabel); + } } } } + } - const uint32 blocksW = dcj.width / 4; - const uint32 blocksH = dcj.height / 4; + static void DilateLabelBackward(Label &l, + CompressionLabel *neighbors[5], + bool bHighLabel) { + if(l.distance == 1) + return; - // Go over the 7x7 texel blocks and extract bounding box diagonals for each - // block. We should be able to choose which diagonal we want... - const int32 kKernelSz = 7; + const Label *nbs[5] = { + bHighLabel? &(neighbors[0]->highLabel) : &(neighbors[0]->lowLabel), + bHighLabel? &(neighbors[1]->highLabel) : &(neighbors[1]->lowLabel), + bHighLabel? &(neighbors[2]->highLabel) : &(neighbors[2]->lowLabel), + bHighLabel? &(neighbors[3]->highLabel) : &(neighbors[3]->lowLabel), + bHighLabel? &(neighbors[4]->highLabel) : &(neighbors[4]->lowLabel) + }; + + // Figure out which labels are closest... + uint8 minDist = 5; + for(uint32 i = 0; i < 5; i++) { + if(nbs[i]->distance > 0) + minDist = ::std::min(nbs[i]->distance, minDist); + } + + assert(minDist > 0); + + uint8 newDist = minDist + 1; + if((l.distance != 0 && l.distance < newDist) || newDist > 4) { + return; + } + + if(l.distance != newDist) { + l.nLabels = 0; + } + + for(uint32 i = 0; i < 5; i++) { + if(nbs[i]->distance == minDist) { + l.Combine(*nbs[i]); + } + } + l.distance = newDist; + } + + static void LabelImageBackward(CompressionLabel *labels, + const uint32 w, const uint32 h) { + CompressionLabel *neighbors[5] = { 0 }; + for(int32 j = static_cast(h)+2; j >= 0; j--) { + for(int32 i = static_cast(w)-1; i >= 0; i--) { + + CompressionLabel &l = labels[(j % h) * w + i]; + + // Add top right corner + neighbors[0] = &(labels[((j + h - 1) % h) * w + ((i + 1) % w)]); + + // Add right label + neighbors[1] = &(labels[(j % h) * w + ((i + 1) % w)]); + + // Add bottom right label + neighbors[2] = &(labels[((j + 1) % h) * w + ((i + 1) % w)]); + + // Add bottom label + neighbors[3] = &(labels[((j + 1) % h) * w + i]); + + // Add bottom left label + neighbors[4] = &(labels[((j + 1) % h) * w + ((i + w - 1) % w)]); + + DilateLabelBackward(l.highLabel, neighbors, true); + DilateLabelBackward(l.lowLabel, neighbors, false); + } + } + } + + static FasTC::Color CollectLabel(const uint32 *pixels, const Label &label) { + FasTC::Color ret; + uint32 nPs = 0; + for(uint32 p = 0; p < label.nLabels; p++) { + FasTC::Color c; c.Unpack(pixels[label.idxs[p]]); + ret += c * label.times[p]; + nPs += label.times[p]; + } + ret /= nPs; + return ret; + } + + static void GenerateLowHighImages(CompressionLabel *labels, + const uint8 *inBuf, uint8 *outBuf, + const uint32 w, const uint32 h) { + assert((w % 4) == 0); + assert((h % 4) == 0); + + uint32 blocksW = w / 4; + uint32 blocksH = h / 4; + + FasTC::Color blockColors[2][16]; + const uint32 *pixels = reinterpret_cast(inBuf); - Image imgA = downscaled; - Image imgB = downscaled; for(uint32 j = 0; j < blocksH; j++) { for(uint32 i = 0; i < blocksW; i++) { - int32 startX = i*4 + 2 - (kKernelSz / 2); - int32 startY = j*4 + 2 - (kKernelSz / 2); - for(int32 y = startY; y < startY + kKernelSz; y++) { - for(int32 x = startX; x < startX + kKernelSz; x++) { - const Pixel &po = Lookup(original, x, y, dcj.width, dcj.height, wrapMode); - Pixel &pa = imgA(i, j); - Pixel &pb = imgB(i, j); - for(uint32 c = 0; c < 4; c++) { - pa.Component(c) = ::std::max(po.Component(c), pa.Component(c)); - pb.Component(c) = ::std::min(po.Component(c), pb.Component(c)); - } - } - } - } - } - imgA.DebugOutput("ImageA"); - imgB.DebugOutput("ImageB"); + float minIntensity = 1.1, maxIntensity = -0.1; + uint32 minIntensityIdx = 0, maxIntensityIdx = 0; + for(uint32 y = j*4; y <= (j+1)*4; y++) + for(uint32 x = i*4; x <= (i+1)*4; x++) { - // Determine modulation values... - Image upA = imgA; - Image upB = imgB; - - upA.BilinearUpscale(2, 2, wrapMode); - upB.BilinearUpscale(2, 2, wrapMode); - - assert(upA.GetHeight() == dcj.height && upA.GetWidth() == dcj.width); - assert(upB.GetHeight() == dcj.height && upB.GetWidth() == dcj.width); - - upA.DebugOutput("UpscaledA"); - upB.DebugOutput("UpscaledB"); - - // Choose the most appropriate modulation values for the two images... - ::std::vector modValues; - modValues.resize(dcj.width * dcj.height); - for(uint32 j = 0; j < dcj.height; j++) { - for(uint32 i = 0; i < dcj.width; i++) { - uint8 &mv = modValues[j * dcj.width + i]; - - const Pixel pa = upA(i, j); - const Pixel pb = upB(i, j); - const Pixel po = original(i, j); - - // !FIXME! there are two modulation modes... we're only using one. - uint8 modSteps[4] = { 8, 5, 3, 0 }; - uint8 bestMod = 0; - uint32 bestError = 0xFFFFFFFF; - for(uint32 s = 0; s < 4; s++) { - uint32 error = 0; - for(uint32 c = 0; c < 4; c++) { - uint16 va = static_cast(pa.Component(c)); - uint16 vb = static_cast(pb.Component(c)); - uint16 vo = static_cast(po.Component(c)); - - uint16 lerpVal = modSteps[s]; - uint16 res = (va * (8 - lerpVal) + vb * lerpVal) / 8; - uint16 e = (res > vo)? res - vo : vo - res; - error += e * e; + uint32 idx = (y%h)*w + (x%w); + float intensity = labels[idx].intensity; + if(intensity < minIntensity) { + minIntensity = intensity; + minIntensityIdx = idx; } - if(error < bestError) { - bestError = error; - bestMod = s; + if(intensity > maxIntensity) { + maxIntensity = intensity; + maxIntensityIdx = idx; + } + + if(x == ((i + 1) * 4) || y == ((j + 1) * 4)) + continue; + + uint32 localIdx = (y-(j*4))*4 + x-(i*4); + assert(localIdx < 16); + + if(labels[idx].highLabel.distance > 0) { + blockColors[0][localIdx] = CollectLabel(pixels, labels[idx].highLabel); + } else { + // Mark the color as unused + blockColors[0][localIdx].A() = -1.0f; + } + + if(labels[idx].lowLabel.distance > 0) { + blockColors[1][localIdx] = CollectLabel(pixels, labels[idx].lowLabel); + } else { + // Mark the color as unused + blockColors[1][localIdx].A() = -1.0f; } } - mv = bestMod; - } - } + // Average all of the values together now... + FasTC::Color high, low; + for(uint32 y = 0; y < 4; y++) + for(uint32 x = 0; x < 4; x++) { + uint32 idx = y * 4 + x; + FasTC::Color c = blockColors[0][idx]; + if(c.A() < 0.0f) { + c.Unpack(pixels[maxIntensityIdx]); + } + high += c * (1.0f / 16.0f); - Image modulationImg(dcj.height, dcj.width); - for(uint32 j = 0; j < dcj.height; j++) { - for(uint32 i = 0; i < dcj.width; i++) { - uint32 idx = j * dcj.width + i; - uint8 modVal = static_cast((static_cast(modValues[idx]) / 3.0f) * 255.0f); - - Pixel p; - for(uint32 c = 1; c < 4; c++) { - p.Component(c) = modVal; + c = blockColors[1][idx]; + if(c.A() < 0.0f) { + c.Unpack(pixels[minIntensityIdx]); + } + low += c * (1.0f / 16.0f); } - p.A() = 255; - modulationImg(i, j) = p; - } - } - modulationImg.DebugOutput("Modulation"); - // Pack everything into a PVRTC blocks. - assert(imgA.GetHeight() == blocksH); - assert(imgA.GetWidth() == blocksW); - - std::vector blocks; - blocks.reserve(blocksW * blocksH); - for(uint32 j = 0; j < blocksH; j++) { - for(uint32 i = 0; i < blocksW; i++) { + // Store them as our endpoints for this block... Block b; - b.SetColorA(imgA(i, j), true); - b.SetColorB(imgB(i, j), true); - for(uint32 t = 0; t < 16; t++) { - uint32 x = i*4 + (t%4); - uint32 y = j*4 + (t/4); - b.SetLerpValue(t, modValues[y*dcj.width + x]); - } - blocks.push_back(b.Pack()); - } - } + FasTC::Pixel p; + p.Unpack(high.Pack()); + b.SetColorA(p); - // Spit out the blocks... - for(uint32 j = 0; j < blocksH; j++) { - for(uint32 i = 0; i < blocksW; i++) { + p.Unpack(low.Pack()); + b.SetColorB(p); - // The blocks are initially arranged in morton order. Let's - // linearize them... - uint32 idx = Interleave(j, i); - - uint64 *outPtr = reinterpret_cast(dcj.outBuf); - outPtr[idx] = blocks[j*blocksW + i]; + uint64 *outBlocks = reinterpret_cast(outBuf); + outBlocks[j * blocksW + i] = b.Pack(); } } } + static FasTC::Pixel BilerpPixels(uint32 x, uint32 y, + const FasTC::Pixel &p, FasTC::Pixel &fp, + const FasTC::Pixel &topLeft, + const FasTC::Pixel &topRight, + const FasTC::Pixel &bottomLeft, + const FasTC::Pixel &bottomRight) { + + const uint32 highXWeight = x; + const uint32 lowXWeight = 4 - x; + const uint32 highYWeight = y; + const uint32 lowYWeight = 4 - y; + + const uint32 topLeftWeight = lowXWeight * lowYWeight; + const uint32 topRightWeight = highXWeight * lowYWeight; + const uint32 bottomLeftWeight = lowXWeight * highYWeight; + const uint32 bottomRightWeight = highXWeight * highYWeight; + + // bilerp each channel.... + const FasTC::Pixel tl = topLeft * topLeftWeight; + const FasTC::Pixel tr = topRight * topRightWeight; + const FasTC::Pixel bl = bottomLeft * bottomLeftWeight; + const FasTC::Pixel br = bottomRight * bottomRightWeight; + const FasTC::Pixel sum = tl + tr + bl + br; + + for(uint32 c = 0; c < 4; c++) { + fp.Component(c) = sum.Component(c) & 15; + } + + FasTC::Pixel tmp(p); + tmp = sum / (16); + + const uint8 fullDepth[4] = { 8, 8, 8, 8 }; + tmp.ChangeBitDepth(fullDepth); + + const uint8 currentDepth[4] = { 4, 5, 5, 5 }; + const uint8 fractionDepth[4] = { 4, 4, 4, 4 }; + + for(uint32 c = 0; c < 4; c++) { + const uint32 denominator = (1 << currentDepth[c]); + const uint32 numerator = denominator + 1; + + const uint32 shift = fractionDepth[c] - (fullDepth[c] - currentDepth[c]); + const uint32 fractionBits = tmp.Component(c) >> shift; + + uint32 component = p.Component(c); + component += ((fractionBits * numerator) / denominator); + + tmp.Component(c) = component; + } + + return tmp; + } + + static void ChangePixelTo4555(FasTC::Pixel &p) { + uint8 refDepths[4]; + p.GetBitDepth(refDepths); + + const uint8 scaleDepths[4] = { 4, 5, 5, 5 }; + p.ChangeBitDepth(scaleDepths); + + if(refDepths[0] > 0) { + p.A() = p.A() & 0xFE; + } + } + + static void GenerateModulationValues(uint8 *outBuf, const uint8 *inBuf, uint32 w, uint32 h) { + // Start from the beginning block and generate the lerp values for the intermediate values + uint64 *outBlocks = reinterpret_cast(outBuf); + const uint32 blocksW = w >> 2; + const uint32 blocksH = h >> 2; + + const uint32 *pixels = reinterpret_cast(inBuf); + + // Make sure the bit depth matches the original... + FasTC::Pixel p; + uint8 bitDepth[4] = { 4, 5, 5, 5 }; + p.ChangeBitDepth(bitDepth); + + // Save fractional bits + FasTC::Pixel fp; + uint8 fpDepths[4] = { 4, 4, 4, 4 }; + fp.ChangeBitDepth(fpDepths); + + for(uint32 j = 0; j < blocksH; j++) { + for(uint32 i = 0; i < blocksW; i++) { + + const int32 lowXIdx = i; + const int32 highXIdx = (i + 1) % w; + const int32 lowYIdx = j; + const int32 highYIdx = (j + 1) % h; + + const uint32 topLeftBlockIdx = lowYIdx * blocksW + lowXIdx; + const uint32 topRightBlockIdx = lowYIdx * blocksW + highXIdx; + const uint32 bottomLeftBlockIdx = highYIdx * blocksW + lowXIdx; + const uint32 bottomRightBlockIdx = highYIdx * blocksW + highXIdx; + + Block topLeftBlock(reinterpret_cast(outBlocks + topLeftBlockIdx)); + Block topRightBlock(reinterpret_cast(outBlocks + topRightBlockIdx)); + Block bottomLeftBlock(reinterpret_cast(outBlocks + bottomLeftBlockIdx)); + Block bottomRightBlock(reinterpret_cast(outBlocks + bottomRightBlockIdx)); + + FasTC::Pixel topLeftA (topLeftBlock.GetColorA()); + FasTC::Pixel topLeftB (topLeftBlock.GetColorB()); + + FasTC::Pixel topRightA (topRightBlock.GetColorA()); + FasTC::Pixel topRightB (topRightBlock.GetColorB()); + + FasTC::Pixel bottomLeftA (bottomLeftBlock.GetColorA()); + FasTC::Pixel bottomLeftB (bottomLeftBlock.GetColorB()); + + FasTC::Pixel bottomRightA (topLeftBlock.GetColorA()); + FasTC::Pixel bottomRightB (topLeftBlock.GetColorB()); + + ChangePixelTo4555(topLeftA); + ChangePixelTo4555(topLeftB); + + ChangePixelTo4555(topRightA); + ChangePixelTo4555(topRightB); + + ChangePixelTo4555(bottomLeftA); + ChangePixelTo4555(bottomLeftB); + + ChangePixelTo4555(bottomRightA); + ChangePixelTo4555(bottomRightB); + + for(uint32 x = 0; x < 4; x++) { + for(uint32 y = 0; y < 4; y++) { + uint32 pixelX = (i + 2 + x) % w; + uint32 pixelY = (j + 2 + y) % h; + FasTC::Pixel colorA = BilerpPixels(x, y, p, fp, topLeftA, topRightA, bottomLeftA, bottomRightA); + FasTC::Pixel colorB = BilerpPixels(x, y, p, fp, topLeftB, topRightB, bottomLeftB, bottomRightB); + FasTC::Pixel original(pixels[pixelY * w + pixelX]); + + // !FIXME! there are two modulation modes... we're only using one. + uint8 modSteps[4] = { 8, 5, 3, 0 }; + uint8 bestMod = 0; + uint32 bestError = 0xFFFFFFFF; + for(uint32 s = 0; s < 4; s++) { + uint16 lerpVal = modSteps[s]; + FasTC::Pixel result = (colorA * (8 - lerpVal) + colorB * lerpVal) / 8; + + FasTC::Vector4 errorVec; + for(uint32 c = 0; c < 4; c++) { + int32 r = result.Component(c); + int32 o = original.Component(c); + errorVec[c] = r - o; + } + uint32 error = static_cast(errorVec.LengthSq()); + + if(error < bestError) { + bestError = error; + bestMod = s; + } + } + + Block *pixelBlock = &topLeftBlock; + uint32 pixelBlockIdx = topLeftBlockIdx; + if(x > 1) { + if(y > 1) { + pixelBlock = &bottomRightBlock; + pixelBlockIdx = bottomRightBlockIdx; + } else { + pixelBlock = &topRightBlock; + pixelBlockIdx = topRightBlockIdx; + } + } else if(y > 1) { + pixelBlock = &bottomLeftBlock; + pixelBlockIdx = bottomLeftBlockIdx; + } + + pixelBlock->SetLerpValue((pixelY % 4) * 4 + (pixelX % 4), bestMod); + outBlocks[pixelBlockIdx] = outBlocks[pixelBlockIdx] | pixelBlock->Pack(); + } + } + } + } + } + + void Compress(const CompressionJob &cj, bool bTwoBit, EWrapMode wrapMode) { + const uint32 width = cj.width; + const uint32 height = cj.height; + + memset(cj.outBuf, 0, (width * height / 16) * kBlockSize); + + CompressionLabel *labels = + (CompressionLabel *)calloc(width * height, sizeof(CompressionLabel)); + + // First traverse forward... + LabelImageForward(labels, cj.inBuf, width, height); + +#ifndef NDEBUG + Image highForwardLabels(width, height); + Image lowForwardLabels(width, height); + + const FasTC::Color kLabelPalette[4] = { + FasTC::Color(0.0, 0.0, 1.0, 1.0), + FasTC::Color(1.0, 0.0, 1.0, 1.0), + FasTC::Color(1.0, 0.0, 0.0, 1.0), + FasTC::Color(1.0, 1.0, 0.0, 1.0) + }; + + for(uint32 j = 0; j < height; j++) { + for(uint32 i = 0; i < width; i++) { + const CompressionLabel &l = labels[j*width + i]; + + const Label &hl = l.highLabel; + if(hl.distance > 0) { + highForwardLabels(i, j).Unpack(kLabelPalette[hl.distance-1].Pack()); + } + + const Label &ll = l.lowLabel; + if(ll.distance > 0) { + lowForwardLabels(i, j).Unpack(kLabelPalette[ll.distance-1].Pack()); + } + } + } + + highForwardLabels.DebugOutput("HighForwardLabels"); + lowForwardLabels.DebugOutput("LowForwardLabels"); + + Image highForwardImg(width, height); + Image lowForwardImg(width, height); + const uint32 *pixels = reinterpret_cast(cj.inBuf); + for(uint32 j = 0; j < height; j++) { + for(uint32 i = 0; i < width; i++) { + const CompressionLabel &l = labels[j*width + i]; + + const Label &hl = l.highLabel; + if(hl.distance > 0) { + FasTC::Color c; + uint32 nPs = 0; + for(uint32 p = 0; p < hl.nLabels; p++) { + FasTC::Color pc; pc.Unpack(pixels[hl.idxs[p]]); + c += pc * static_cast(hl.times[p]); + nPs += hl.times[p]; + } + c /= nPs; + highForwardImg(i, j).Unpack(c.Pack()); + } + + const Label &ll = l.lowLabel; + if(ll.distance > 0) { + FasTC::Color c; + uint32 nPs = 0; + for(uint32 p = 0; p < ll.nLabels; p++) { + FasTC::Color pc; pc.Unpack(pixels[ll.idxs[p]]); + c += pc * static_cast(ll.times[p]); + nPs += ll.times[p]; + } + c /= nPs; + lowForwardImg(i, j).Unpack(c.Pack()); + } + } + } + + highForwardImg.DebugOutput("HighForwardImg"); + lowForwardImg.DebugOutput("LowForwardImg"); + + std::cout << "Output Forward images." << std::endl; +#endif + + // Then traverse backward... + LabelImageBackward(labels, width, height); + +#ifndef NDEBUG + Image highImg(width, height); + Image lowImg(width, height); + for(uint32 j = 0; j < height; j++) { + for(uint32 i = 0; i < width; i++) { + const CompressionLabel &l = labels[j*width + i]; + + const Label &hl = l.highLabel; + if(hl.distance > 0) { + FasTC::Color c; + for(uint32 p = 0; p < hl.nLabels; p++) { + FasTC::Color pc; pc.Unpack(pixels[hl.idxs[p]]); + c += pc; + } + c /= hl.nLabels; + highImg(i, j).Unpack(c.Pack()); + } + + const Label &ll = l.lowLabel; + if(ll.distance > 0) { + FasTC::Color c; + for(uint32 p = 0; p < ll.nLabels; p++) { + FasTC::Color pc; pc.Unpack(pixels[ll.idxs[p]]); + c += pc; + } + c /= ll.nLabels; + lowImg(i, j).Unpack(c.Pack()); + } + } + } + + highImg.DebugOutput("HighImg"); + lowImg.DebugOutput("LowImg"); + + std::cout << "Output images." << std::endl; +#endif + + // Then combine everything... + GenerateLowHighImages(labels, cj.inBuf, cj.outBuf, width, height); + + // Then compute modulation values + GenerateModulationValues(cj.outBuf, cj.inBuf, width, height); + + // Cleanup + free(labels); + } } // namespace PVRTCC