mirror of
https://github.com/yuzu-emu/FasTC.git
synced 2025-01-08 05:55:35 +00:00
Grab compressor from experimental branch
This commit is contained in:
parent
f1b564fdb2
commit
345292e36a
|
@ -54,10 +54,13 @@
|
|||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
#include "Pixel.h"
|
||||
#include "Color.h"
|
||||
#include "PVRTCImage.h"
|
||||
#include "Block.h"
|
||||
|
||||
|
@ -86,211 +89,685 @@ namespace PVRTCC {
|
|||
return x | (y << 1);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static T Clamp(const T &v, const T &low, const T &high) {
|
||||
return ::std::min(::std::max(low, v), high);
|
||||
struct Label {
|
||||
uint8 distance;
|
||||
uint8 nLabels;
|
||||
static const int kMaxNumIdxs = 16;
|
||||
uint8 times[kMaxNumIdxs];
|
||||
uint32 idxs[kMaxNumIdxs];
|
||||
float blockIntensity;
|
||||
|
||||
void AddIdx(uint32 idx) {
|
||||
for(uint32 i = 0; i < nLabels; i++) {
|
||||
if(idxs[i] == idx) {
|
||||
times[i]++;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
assert(nLabels < kMaxNumIdxs);
|
||||
times[nLabels] = 1;
|
||||
idxs[nLabels] = idx;
|
||||
nLabels++;
|
||||
}
|
||||
|
||||
void Combine(const Label &other) {
|
||||
for(uint32 i = 0; i < other.nLabels; i++) {
|
||||
AddIdx(other.idxs[i]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct CompressionLabel {
|
||||
bool bCachedIntensity;
|
||||
float intensity;
|
||||
Label highLabel;
|
||||
Label lowLabel;
|
||||
};
|
||||
|
||||
static float LookupIntensity(CompressionLabel *labels, const uint32 *pixels,
|
||||
const uint32 idx) {
|
||||
if(labels[idx].bCachedIntensity) {
|
||||
return labels[idx].intensity;
|
||||
}
|
||||
|
||||
uint32 pixel = pixels[idx];
|
||||
const float a = static_cast<float>((pixel >> 24) & 0xFF) / 255.0f;
|
||||
const float r = a * static_cast<float>(pixel & 0xFF) / 255.0f;
|
||||
const float g = a * static_cast<float>((pixel >> 8) & 0xFF) / 255.0f;
|
||||
const float b = a * static_cast<float>((pixel >> 16) & 0xFF) / 255.0f;
|
||||
|
||||
labels[idx].intensity = r * 0.2126f + g * 0.7152f + b * 0.0722f;
|
||||
labels[idx].bCachedIntensity = true;
|
||||
return labels[idx].intensity;
|
||||
}
|
||||
|
||||
static const Pixel &Lookup(const Image &img,
|
||||
int32 x, int32 y,
|
||||
uint32 width, uint32 height,
|
||||
const EWrapMode wrapMode) {
|
||||
int32 w = static_cast<int32>(width);
|
||||
int32 h = static_cast<int32>(height);
|
||||
enum EExtremaResult {
|
||||
eExtremaResult_Neither,
|
||||
eExtremaResult_LocalMin,
|
||||
eExtremaResult_LocalMax
|
||||
};
|
||||
|
||||
assert(w >= 0);
|
||||
assert(h >= 0);
|
||||
static EExtremaResult ComputeLocalExtrema(
|
||||
CompressionLabel *labels, const uint8 *inBuf,
|
||||
const uint32 x, const uint32 y, const uint32 width, const uint32 height) {
|
||||
|
||||
while(x >= w) {
|
||||
x = (wrapMode == eWrapMode_Wrap)? x - w : w - 1;
|
||||
assert(x < width);
|
||||
assert(y < height);
|
||||
|
||||
const uint32 *pixels = reinterpret_cast<const uint32 *>(inBuf);
|
||||
uint8 i0 = static_cast<uint8>(255.0f * LookupIntensity(labels, pixels, y*width + x) + 0.5f);
|
||||
|
||||
int32 ng = 0;
|
||||
int32 nl = 0;
|
||||
|
||||
const int32 kKernelSz = 3;
|
||||
const int32 kHalfKernelSz = kKernelSz >> 1;
|
||||
for(int32 j = -kHalfKernelSz; j <= kHalfKernelSz; j++)
|
||||
for(int32 i = -kHalfKernelSz; i <= kHalfKernelSz; i++) {
|
||||
|
||||
if(i == 0 && j == 0) continue;
|
||||
|
||||
int32 xx = (i + static_cast<int32>(x + width)) % width;
|
||||
int32 yy = (j + static_cast<int32>(y + height)) % height;
|
||||
|
||||
assert(xx >= 0 && xx < static_cast<int32>(width));
|
||||
assert(yy >= 0 && yy < static_cast<int32>(height));
|
||||
|
||||
uint32 idx = static_cast<uint32>(xx) + width * static_cast<uint32>(yy);
|
||||
uint8 ix = static_cast<uint8>(255.0f * LookupIntensity(labels, pixels, idx) + 0.5f);
|
||||
if(ix >= i0) {
|
||||
ng++;
|
||||
}
|
||||
|
||||
if(ix <= i0) {
|
||||
nl++;
|
||||
}
|
||||
}
|
||||
|
||||
while(x < 0) {
|
||||
x = (wrapMode == eWrapMode_Wrap)? x + w : 0;
|
||||
EExtremaResult result = eExtremaResult_Neither;
|
||||
if(ng == nl) {
|
||||
return result;
|
||||
}
|
||||
|
||||
while(y >= h) {
|
||||
y = (wrapMode == eWrapMode_Wrap)? y - h : h - 1;
|
||||
CompressionLabel &l = labels[y*width + x];
|
||||
const int32 kThreshold = kKernelSz * kKernelSz - 1;
|
||||
if(ng >= kThreshold) {
|
||||
l.lowLabel.distance = 1;
|
||||
l.lowLabel.AddIdx(y*width+x);
|
||||
result = eExtremaResult_LocalMin;
|
||||
} else if(nl >= kThreshold) {
|
||||
l.highLabel.distance = 1;
|
||||
l.highLabel.AddIdx(y*width+x);
|
||||
result = eExtremaResult_LocalMax;
|
||||
}
|
||||
|
||||
while(y < 0) {
|
||||
y = (wrapMode == eWrapMode_Wrap)? y + h : 0;
|
||||
}
|
||||
|
||||
return img(x, y);
|
||||
return result;
|
||||
}
|
||||
|
||||
void Compress(const CompressionJob &dcj,
|
||||
bool bTwoBitMode,
|
||||
const EWrapMode wrapMode) {
|
||||
Image img(dcj.height, dcj.width);
|
||||
uint32 nPixels = dcj.height * dcj.width;
|
||||
for(uint32 i = 0; i < nPixels; i++) {
|
||||
uint32 x = i % dcj.width;
|
||||
uint32 y = i / dcj.width;
|
||||
static void DilateLabelForward(Label &l, const Label &up, const Label &left) {
|
||||
|
||||
const uint32 *pixels = reinterpret_cast<const uint32 *>(dcj.inBuf);
|
||||
img(x, y).Unpack(pixels[i]);
|
||||
if(l.distance == 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
Image original = img;
|
||||
img.DebugOutput("Original");
|
||||
if(l.nLabels == 0) {
|
||||
l.nLabels = 0;
|
||||
}
|
||||
|
||||
// Downscale it using anisotropic diffusion based scheme in order to preserve
|
||||
// image features, then reupscale and compute deltas. Use deltas to generate
|
||||
// initial A & B images followed by modulation data.
|
||||
img.ContentAwareDownscale(1, 1, eWrapMode_Wrap, true);
|
||||
img.ContentAwareDownscale(1, 1, eWrapMode_Wrap, false);
|
||||
// Are we in no position to dilate?
|
||||
if(up.distance == 0 && left.distance == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
Image downscaled = img;
|
||||
// Have we visited up yet?
|
||||
if(up.distance == 0) {
|
||||
if(left.distance < 4) {
|
||||
l.distance = left.distance + 1;
|
||||
l.AddIdx(left.idxs[0]);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Upscale it again
|
||||
img.BilinearUpscale(2, 2, eWrapMode_Wrap);
|
||||
// Have we visited left yet?
|
||||
if(left.distance == 0) {
|
||||
if(up.distance < 4) {
|
||||
l.distance = up.distance + 1;
|
||||
l.AddIdx(up.idxs[0]);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
img.DebugOutput("Reconstruction");
|
||||
// Otherwise, if they're the same, then we're at a corner...
|
||||
if(left.distance == up.distance) {
|
||||
if(left.idxs[0] == up.idxs[0]) {
|
||||
l.distance = left.distance;
|
||||
l.AddIdx(left.idxs[0]);
|
||||
} else {
|
||||
if(up.distance < 4) {
|
||||
l.distance = up.distance + 1;
|
||||
l.AddIdx(up.idxs[0]);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Compute difference...
|
||||
::std::vector<int16> difference;
|
||||
difference.resize(dcj.height * dcj.width * 4);
|
||||
for(uint32 j = 0; j < dcj.height; j++) {
|
||||
for(uint32 i = 0; i < dcj.width; i++) {
|
||||
for(uint32 c = 0; c < 4; c++) {
|
||||
int16 o = original(i, j).Component(c);
|
||||
int16 n = img(i, j).Component(c);
|
||||
difference[j*dcj.width*4 + i*4 + c] = o - n;
|
||||
// Otherwise, we're at a disjoint part, so take the minimum and add
|
||||
// one to the distance and assume their index...
|
||||
if(left.distance < up.distance) {
|
||||
l.distance = left.distance + 1;
|
||||
l.AddIdx(left.idxs[0]);
|
||||
} else { // up.distance < left.distance
|
||||
l.distance = up.distance + 1;
|
||||
l.AddIdx(up.idxs[0]);
|
||||
}
|
||||
}
|
||||
|
||||
static void LabelImageForward(CompressionLabel *labels,
|
||||
const uint8 *inBuf,
|
||||
const uint32 w, const uint32 h) {
|
||||
for(uint32 j = 0; j < h+3; j++) {
|
||||
for(uint32 i = 0; i < w; i++) {
|
||||
EExtremaResult result = ComputeLocalExtrema(labels, inBuf, i, j % h, w, h);
|
||||
bool dilateMax = result != eExtremaResult_LocalMax;
|
||||
bool dilateMin = result != eExtremaResult_LocalMin;
|
||||
|
||||
if(dilateMax || dilateMin) {
|
||||
// Look up and to the left to determine the distance...
|
||||
uint32 upIdx = ((j+h-1) % h) * w + i;
|
||||
uint32 leftIdx = (j % h) * w + ((i+w-1) % w);
|
||||
|
||||
CompressionLabel &l = labels[(j % h)*w + i];
|
||||
CompressionLabel &up = labels[upIdx];
|
||||
CompressionLabel &left = labels[leftIdx];
|
||||
|
||||
if(dilateMax) {
|
||||
DilateLabelForward(l.highLabel, up.highLabel, left.highLabel);
|
||||
}
|
||||
|
||||
if(dilateMin) {
|
||||
DilateLabelForward(l.lowLabel, up.lowLabel, left.lowLabel);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const uint32 blocksW = dcj.width / 4;
|
||||
const uint32 blocksH = dcj.height / 4;
|
||||
static void DilateLabelBackward(Label &l,
|
||||
CompressionLabel *neighbors[5],
|
||||
bool bHighLabel) {
|
||||
if(l.distance == 1)
|
||||
return;
|
||||
|
||||
// Go over the 7x7 texel blocks and extract bounding box diagonals for each
|
||||
// block. We should be able to choose which diagonal we want...
|
||||
const int32 kKernelSz = 7;
|
||||
const Label *nbs[5] = {
|
||||
bHighLabel? &(neighbors[0]->highLabel) : &(neighbors[0]->lowLabel),
|
||||
bHighLabel? &(neighbors[1]->highLabel) : &(neighbors[1]->lowLabel),
|
||||
bHighLabel? &(neighbors[2]->highLabel) : &(neighbors[2]->lowLabel),
|
||||
bHighLabel? &(neighbors[3]->highLabel) : &(neighbors[3]->lowLabel),
|
||||
bHighLabel? &(neighbors[4]->highLabel) : &(neighbors[4]->lowLabel)
|
||||
};
|
||||
|
||||
// Figure out which labels are closest...
|
||||
uint8 minDist = 5;
|
||||
for(uint32 i = 0; i < 5; i++) {
|
||||
if(nbs[i]->distance > 0)
|
||||
minDist = ::std::min(nbs[i]->distance, minDist);
|
||||
}
|
||||
|
||||
assert(minDist > 0);
|
||||
|
||||
uint8 newDist = minDist + 1;
|
||||
if((l.distance != 0 && l.distance < newDist) || newDist > 4) {
|
||||
return;
|
||||
}
|
||||
|
||||
if(l.distance != newDist) {
|
||||
l.nLabels = 0;
|
||||
}
|
||||
|
||||
for(uint32 i = 0; i < 5; i++) {
|
||||
if(nbs[i]->distance == minDist) {
|
||||
l.Combine(*nbs[i]);
|
||||
}
|
||||
}
|
||||
l.distance = newDist;
|
||||
}
|
||||
|
||||
static void LabelImageBackward(CompressionLabel *labels,
|
||||
const uint32 w, const uint32 h) {
|
||||
CompressionLabel *neighbors[5] = { 0 };
|
||||
for(int32 j = static_cast<int32>(h)+2; j >= 0; j--) {
|
||||
for(int32 i = static_cast<int32>(w)-1; i >= 0; i--) {
|
||||
|
||||
CompressionLabel &l = labels[(j % h) * w + i];
|
||||
|
||||
// Add top right corner
|
||||
neighbors[0] = &(labels[((j + h - 1) % h) * w + ((i + 1) % w)]);
|
||||
|
||||
// Add right label
|
||||
neighbors[1] = &(labels[(j % h) * w + ((i + 1) % w)]);
|
||||
|
||||
// Add bottom right label
|
||||
neighbors[2] = &(labels[((j + 1) % h) * w + ((i + 1) % w)]);
|
||||
|
||||
// Add bottom label
|
||||
neighbors[3] = &(labels[((j + 1) % h) * w + i]);
|
||||
|
||||
// Add bottom left label
|
||||
neighbors[4] = &(labels[((j + 1) % h) * w + ((i + w - 1) % w)]);
|
||||
|
||||
DilateLabelBackward(l.highLabel, neighbors, true);
|
||||
DilateLabelBackward(l.lowLabel, neighbors, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static FasTC::Color CollectLabel(const uint32 *pixels, const Label &label) {
|
||||
FasTC::Color ret;
|
||||
uint32 nPs = 0;
|
||||
for(uint32 p = 0; p < label.nLabels; p++) {
|
||||
FasTC::Color c; c.Unpack(pixels[label.idxs[p]]);
|
||||
ret += c * label.times[p];
|
||||
nPs += label.times[p];
|
||||
}
|
||||
ret /= nPs;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void GenerateLowHighImages(CompressionLabel *labels,
|
||||
const uint8 *inBuf, uint8 *outBuf,
|
||||
const uint32 w, const uint32 h) {
|
||||
assert((w % 4) == 0);
|
||||
assert((h % 4) == 0);
|
||||
|
||||
uint32 blocksW = w / 4;
|
||||
uint32 blocksH = h / 4;
|
||||
|
||||
FasTC::Color blockColors[2][16];
|
||||
const uint32 *pixels = reinterpret_cast<const uint32 *>(inBuf);
|
||||
|
||||
Image imgA = downscaled;
|
||||
Image imgB = downscaled;
|
||||
for(uint32 j = 0; j < blocksH; j++) {
|
||||
for(uint32 i = 0; i < blocksW; i++) {
|
||||
int32 startX = i*4 + 2 - (kKernelSz / 2);
|
||||
int32 startY = j*4 + 2 - (kKernelSz / 2);
|
||||
for(int32 y = startY; y < startY + kKernelSz; y++) {
|
||||
for(int32 x = startX; x < startX + kKernelSz; x++) {
|
||||
const Pixel &po = Lookup(original, x, y, dcj.width, dcj.height, wrapMode);
|
||||
Pixel &pa = imgA(i, j);
|
||||
Pixel &pb = imgB(i, j);
|
||||
for(uint32 c = 0; c < 4; c++) {
|
||||
pa.Component(c) = ::std::max(po.Component(c), pa.Component(c));
|
||||
pb.Component(c) = ::std::min(po.Component(c), pb.Component(c));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
imgA.DebugOutput("ImageA");
|
||||
imgB.DebugOutput("ImageB");
|
||||
float minIntensity = 1.1, maxIntensity = -0.1;
|
||||
uint32 minIntensityIdx = 0, maxIntensityIdx = 0;
|
||||
for(uint32 y = j*4; y <= (j+1)*4; y++)
|
||||
for(uint32 x = i*4; x <= (i+1)*4; x++) {
|
||||
|
||||
// Determine modulation values...
|
||||
Image upA = imgA;
|
||||
Image upB = imgB;
|
||||
|
||||
upA.BilinearUpscale(2, 2, wrapMode);
|
||||
upB.BilinearUpscale(2, 2, wrapMode);
|
||||
|
||||
assert(upA.GetHeight() == dcj.height && upA.GetWidth() == dcj.width);
|
||||
assert(upB.GetHeight() == dcj.height && upB.GetWidth() == dcj.width);
|
||||
|
||||
upA.DebugOutput("UpscaledA");
|
||||
upB.DebugOutput("UpscaledB");
|
||||
|
||||
// Choose the most appropriate modulation values for the two images...
|
||||
::std::vector<uint8> modValues;
|
||||
modValues.resize(dcj.width * dcj.height);
|
||||
for(uint32 j = 0; j < dcj.height; j++) {
|
||||
for(uint32 i = 0; i < dcj.width; i++) {
|
||||
uint8 &mv = modValues[j * dcj.width + i];
|
||||
|
||||
const Pixel pa = upA(i, j);
|
||||
const Pixel pb = upB(i, j);
|
||||
const Pixel po = original(i, j);
|
||||
|
||||
// !FIXME! there are two modulation modes... we're only using one.
|
||||
uint8 modSteps[4] = { 8, 5, 3, 0 };
|
||||
uint8 bestMod = 0;
|
||||
uint32 bestError = 0xFFFFFFFF;
|
||||
for(uint32 s = 0; s < 4; s++) {
|
||||
uint32 error = 0;
|
||||
for(uint32 c = 0; c < 4; c++) {
|
||||
uint16 va = static_cast<uint16>(pa.Component(c));
|
||||
uint16 vb = static_cast<uint16>(pb.Component(c));
|
||||
uint16 vo = static_cast<uint16>(po.Component(c));
|
||||
|
||||
uint16 lerpVal = modSteps[s];
|
||||
uint16 res = (va * (8 - lerpVal) + vb * lerpVal) / 8;
|
||||
uint16 e = (res > vo)? res - vo : vo - res;
|
||||
error += e * e;
|
||||
uint32 idx = (y%h)*w + (x%w);
|
||||
float intensity = labels[idx].intensity;
|
||||
if(intensity < minIntensity) {
|
||||
minIntensity = intensity;
|
||||
minIntensityIdx = idx;
|
||||
}
|
||||
|
||||
if(error < bestError) {
|
||||
bestError = error;
|
||||
bestMod = s;
|
||||
if(intensity > maxIntensity) {
|
||||
maxIntensity = intensity;
|
||||
maxIntensityIdx = idx;
|
||||
}
|
||||
|
||||
if(x == ((i + 1) * 4) || y == ((j + 1) * 4))
|
||||
continue;
|
||||
|
||||
uint32 localIdx = (y-(j*4))*4 + x-(i*4);
|
||||
assert(localIdx < 16);
|
||||
|
||||
if(labels[idx].highLabel.distance > 0) {
|
||||
blockColors[0][localIdx] = CollectLabel(pixels, labels[idx].highLabel);
|
||||
} else {
|
||||
// Mark the color as unused
|
||||
blockColors[0][localIdx].A() = -1.0f;
|
||||
}
|
||||
|
||||
if(labels[idx].lowLabel.distance > 0) {
|
||||
blockColors[1][localIdx] = CollectLabel(pixels, labels[idx].lowLabel);
|
||||
} else {
|
||||
// Mark the color as unused
|
||||
blockColors[1][localIdx].A() = -1.0f;
|
||||
}
|
||||
}
|
||||
|
||||
mv = bestMod;
|
||||
}
|
||||
}
|
||||
// Average all of the values together now...
|
||||
FasTC::Color high, low;
|
||||
for(uint32 y = 0; y < 4; y++)
|
||||
for(uint32 x = 0; x < 4; x++) {
|
||||
uint32 idx = y * 4 + x;
|
||||
FasTC::Color c = blockColors[0][idx];
|
||||
if(c.A() < 0.0f) {
|
||||
c.Unpack(pixels[maxIntensityIdx]);
|
||||
}
|
||||
high += c * (1.0f / 16.0f);
|
||||
|
||||
Image modulationImg(dcj.height, dcj.width);
|
||||
for(uint32 j = 0; j < dcj.height; j++) {
|
||||
for(uint32 i = 0; i < dcj.width; i++) {
|
||||
uint32 idx = j * dcj.width + i;
|
||||
uint8 modVal = static_cast<uint8>((static_cast<float>(modValues[idx]) / 3.0f) * 255.0f);
|
||||
|
||||
Pixel p;
|
||||
for(uint32 c = 1; c < 4; c++) {
|
||||
p.Component(c) = modVal;
|
||||
c = blockColors[1][idx];
|
||||
if(c.A() < 0.0f) {
|
||||
c.Unpack(pixels[minIntensityIdx]);
|
||||
}
|
||||
low += c * (1.0f / 16.0f);
|
||||
}
|
||||
p.A() = 255;
|
||||
modulationImg(i, j) = p;
|
||||
}
|
||||
}
|
||||
modulationImg.DebugOutput("Modulation");
|
||||
|
||||
// Pack everything into a PVRTC blocks.
|
||||
assert(imgA.GetHeight() == blocksH);
|
||||
assert(imgA.GetWidth() == blocksW);
|
||||
|
||||
std::vector<uint64> blocks;
|
||||
blocks.reserve(blocksW * blocksH);
|
||||
for(uint32 j = 0; j < blocksH; j++) {
|
||||
for(uint32 i = 0; i < blocksW; i++) {
|
||||
// Store them as our endpoints for this block...
|
||||
Block b;
|
||||
b.SetColorA(imgA(i, j), true);
|
||||
b.SetColorB(imgB(i, j), true);
|
||||
for(uint32 t = 0; t < 16; t++) {
|
||||
uint32 x = i*4 + (t%4);
|
||||
uint32 y = j*4 + (t/4);
|
||||
b.SetLerpValue(t, modValues[y*dcj.width + x]);
|
||||
}
|
||||
blocks.push_back(b.Pack());
|
||||
}
|
||||
}
|
||||
FasTC::Pixel p;
|
||||
p.Unpack(high.Pack());
|
||||
b.SetColorA(p);
|
||||
|
||||
// Spit out the blocks...
|
||||
for(uint32 j = 0; j < blocksH; j++) {
|
||||
for(uint32 i = 0; i < blocksW; i++) {
|
||||
p.Unpack(low.Pack());
|
||||
b.SetColorB(p);
|
||||
|
||||
// The blocks are initially arranged in morton order. Let's
|
||||
// linearize them...
|
||||
uint32 idx = Interleave(j, i);
|
||||
|
||||
uint64 *outPtr = reinterpret_cast<uint64 *>(dcj.outBuf);
|
||||
outPtr[idx] = blocks[j*blocksW + i];
|
||||
uint64 *outBlocks = reinterpret_cast<uint64 *>(outBuf);
|
||||
outBlocks[j * blocksW + i] = b.Pack();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static FasTC::Pixel BilerpPixels(uint32 x, uint32 y,
|
||||
const FasTC::Pixel &p, FasTC::Pixel &fp,
|
||||
const FasTC::Pixel &topLeft,
|
||||
const FasTC::Pixel &topRight,
|
||||
const FasTC::Pixel &bottomLeft,
|
||||
const FasTC::Pixel &bottomRight) {
|
||||
|
||||
const uint32 highXWeight = x;
|
||||
const uint32 lowXWeight = 4 - x;
|
||||
const uint32 highYWeight = y;
|
||||
const uint32 lowYWeight = 4 - y;
|
||||
|
||||
const uint32 topLeftWeight = lowXWeight * lowYWeight;
|
||||
const uint32 topRightWeight = highXWeight * lowYWeight;
|
||||
const uint32 bottomLeftWeight = lowXWeight * highYWeight;
|
||||
const uint32 bottomRightWeight = highXWeight * highYWeight;
|
||||
|
||||
// bilerp each channel....
|
||||
const FasTC::Pixel tl = topLeft * topLeftWeight;
|
||||
const FasTC::Pixel tr = topRight * topRightWeight;
|
||||
const FasTC::Pixel bl = bottomLeft * bottomLeftWeight;
|
||||
const FasTC::Pixel br = bottomRight * bottomRightWeight;
|
||||
const FasTC::Pixel sum = tl + tr + bl + br;
|
||||
|
||||
for(uint32 c = 0; c < 4; c++) {
|
||||
fp.Component(c) = sum.Component(c) & 15;
|
||||
}
|
||||
|
||||
FasTC::Pixel tmp(p);
|
||||
tmp = sum / (16);
|
||||
|
||||
const uint8 fullDepth[4] = { 8, 8, 8, 8 };
|
||||
tmp.ChangeBitDepth(fullDepth);
|
||||
|
||||
const uint8 currentDepth[4] = { 4, 5, 5, 5 };
|
||||
const uint8 fractionDepth[4] = { 4, 4, 4, 4 };
|
||||
|
||||
for(uint32 c = 0; c < 4; c++) {
|
||||
const uint32 denominator = (1 << currentDepth[c]);
|
||||
const uint32 numerator = denominator + 1;
|
||||
|
||||
const uint32 shift = fractionDepth[c] - (fullDepth[c] - currentDepth[c]);
|
||||
const uint32 fractionBits = tmp.Component(c) >> shift;
|
||||
|
||||
uint32 component = p.Component(c);
|
||||
component += ((fractionBits * numerator) / denominator);
|
||||
|
||||
tmp.Component(c) = component;
|
||||
}
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static void ChangePixelTo4555(FasTC::Pixel &p) {
|
||||
uint8 refDepths[4];
|
||||
p.GetBitDepth(refDepths);
|
||||
|
||||
const uint8 scaleDepths[4] = { 4, 5, 5, 5 };
|
||||
p.ChangeBitDepth(scaleDepths);
|
||||
|
||||
if(refDepths[0] > 0) {
|
||||
p.A() = p.A() & 0xFE;
|
||||
}
|
||||
}
|
||||
|
||||
static void GenerateModulationValues(uint8 *outBuf, const uint8 *inBuf, uint32 w, uint32 h) {
|
||||
// Start from the beginning block and generate the lerp values for the intermediate values
|
||||
uint64 *outBlocks = reinterpret_cast<uint64 *>(outBuf);
|
||||
const uint32 blocksW = w >> 2;
|
||||
const uint32 blocksH = h >> 2;
|
||||
|
||||
const uint32 *pixels = reinterpret_cast<const uint32 *>(inBuf);
|
||||
|
||||
// Make sure the bit depth matches the original...
|
||||
FasTC::Pixel p;
|
||||
uint8 bitDepth[4] = { 4, 5, 5, 5 };
|
||||
p.ChangeBitDepth(bitDepth);
|
||||
|
||||
// Save fractional bits
|
||||
FasTC::Pixel fp;
|
||||
uint8 fpDepths[4] = { 4, 4, 4, 4 };
|
||||
fp.ChangeBitDepth(fpDepths);
|
||||
|
||||
for(uint32 j = 0; j < blocksH; j++) {
|
||||
for(uint32 i = 0; i < blocksW; i++) {
|
||||
|
||||
const int32 lowXIdx = i;
|
||||
const int32 highXIdx = (i + 1) % w;
|
||||
const int32 lowYIdx = j;
|
||||
const int32 highYIdx = (j + 1) % h;
|
||||
|
||||
const uint32 topLeftBlockIdx = lowYIdx * blocksW + lowXIdx;
|
||||
const uint32 topRightBlockIdx = lowYIdx * blocksW + highXIdx;
|
||||
const uint32 bottomLeftBlockIdx = highYIdx * blocksW + lowXIdx;
|
||||
const uint32 bottomRightBlockIdx = highYIdx * blocksW + highXIdx;
|
||||
|
||||
Block topLeftBlock(reinterpret_cast<uint8 *>(outBlocks + topLeftBlockIdx));
|
||||
Block topRightBlock(reinterpret_cast<uint8 *>(outBlocks + topRightBlockIdx));
|
||||
Block bottomLeftBlock(reinterpret_cast<uint8 *>(outBlocks + bottomLeftBlockIdx));
|
||||
Block bottomRightBlock(reinterpret_cast<uint8 *>(outBlocks + bottomRightBlockIdx));
|
||||
|
||||
FasTC::Pixel topLeftA (topLeftBlock.GetColorA());
|
||||
FasTC::Pixel topLeftB (topLeftBlock.GetColorB());
|
||||
|
||||
FasTC::Pixel topRightA (topRightBlock.GetColorA());
|
||||
FasTC::Pixel topRightB (topRightBlock.GetColorB());
|
||||
|
||||
FasTC::Pixel bottomLeftA (bottomLeftBlock.GetColorA());
|
||||
FasTC::Pixel bottomLeftB (bottomLeftBlock.GetColorB());
|
||||
|
||||
FasTC::Pixel bottomRightA (topLeftBlock.GetColorA());
|
||||
FasTC::Pixel bottomRightB (topLeftBlock.GetColorB());
|
||||
|
||||
ChangePixelTo4555(topLeftA);
|
||||
ChangePixelTo4555(topLeftB);
|
||||
|
||||
ChangePixelTo4555(topRightA);
|
||||
ChangePixelTo4555(topRightB);
|
||||
|
||||
ChangePixelTo4555(bottomLeftA);
|
||||
ChangePixelTo4555(bottomLeftB);
|
||||
|
||||
ChangePixelTo4555(bottomRightA);
|
||||
ChangePixelTo4555(bottomRightB);
|
||||
|
||||
for(uint32 x = 0; x < 4; x++) {
|
||||
for(uint32 y = 0; y < 4; y++) {
|
||||
uint32 pixelX = (i + 2 + x) % w;
|
||||
uint32 pixelY = (j + 2 + y) % h;
|
||||
FasTC::Pixel colorA = BilerpPixels(x, y, p, fp, topLeftA, topRightA, bottomLeftA, bottomRightA);
|
||||
FasTC::Pixel colorB = BilerpPixels(x, y, p, fp, topLeftB, topRightB, bottomLeftB, bottomRightB);
|
||||
FasTC::Pixel original(pixels[pixelY * w + pixelX]);
|
||||
|
||||
// !FIXME! there are two modulation modes... we're only using one.
|
||||
uint8 modSteps[4] = { 8, 5, 3, 0 };
|
||||
uint8 bestMod = 0;
|
||||
uint32 bestError = 0xFFFFFFFF;
|
||||
for(uint32 s = 0; s < 4; s++) {
|
||||
uint16 lerpVal = modSteps[s];
|
||||
FasTC::Pixel result = (colorA * (8 - lerpVal) + colorB * lerpVal) / 8;
|
||||
|
||||
FasTC::Vector4<int32> errorVec;
|
||||
for(uint32 c = 0; c < 4; c++) {
|
||||
int32 r = result.Component(c);
|
||||
int32 o = original.Component(c);
|
||||
errorVec[c] = r - o;
|
||||
}
|
||||
uint32 error = static_cast<uint64>(errorVec.LengthSq());
|
||||
|
||||
if(error < bestError) {
|
||||
bestError = error;
|
||||
bestMod = s;
|
||||
}
|
||||
}
|
||||
|
||||
Block *pixelBlock = &topLeftBlock;
|
||||
uint32 pixelBlockIdx = topLeftBlockIdx;
|
||||
if(x > 1) {
|
||||
if(y > 1) {
|
||||
pixelBlock = &bottomRightBlock;
|
||||
pixelBlockIdx = bottomRightBlockIdx;
|
||||
} else {
|
||||
pixelBlock = &topRightBlock;
|
||||
pixelBlockIdx = topRightBlockIdx;
|
||||
}
|
||||
} else if(y > 1) {
|
||||
pixelBlock = &bottomLeftBlock;
|
||||
pixelBlockIdx = bottomLeftBlockIdx;
|
||||
}
|
||||
|
||||
pixelBlock->SetLerpValue((pixelY % 4) * 4 + (pixelX % 4), bestMod);
|
||||
outBlocks[pixelBlockIdx] = outBlocks[pixelBlockIdx] | pixelBlock->Pack();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Compress(const CompressionJob &cj, bool bTwoBit, EWrapMode wrapMode) {
|
||||
const uint32 width = cj.width;
|
||||
const uint32 height = cj.height;
|
||||
|
||||
memset(cj.outBuf, 0, (width * height / 16) * kBlockSize);
|
||||
|
||||
CompressionLabel *labels =
|
||||
(CompressionLabel *)calloc(width * height, sizeof(CompressionLabel));
|
||||
|
||||
// First traverse forward...
|
||||
LabelImageForward(labels, cj.inBuf, width, height);
|
||||
|
||||
#ifndef NDEBUG
|
||||
Image highForwardLabels(width, height);
|
||||
Image lowForwardLabels(width, height);
|
||||
|
||||
const FasTC::Color kLabelPalette[4] = {
|
||||
FasTC::Color(0.0, 0.0, 1.0, 1.0),
|
||||
FasTC::Color(1.0, 0.0, 1.0, 1.0),
|
||||
FasTC::Color(1.0, 0.0, 0.0, 1.0),
|
||||
FasTC::Color(1.0, 1.0, 0.0, 1.0)
|
||||
};
|
||||
|
||||
for(uint32 j = 0; j < height; j++) {
|
||||
for(uint32 i = 0; i < width; i++) {
|
||||
const CompressionLabel &l = labels[j*width + i];
|
||||
|
||||
const Label &hl = l.highLabel;
|
||||
if(hl.distance > 0) {
|
||||
highForwardLabels(i, j).Unpack(kLabelPalette[hl.distance-1].Pack());
|
||||
}
|
||||
|
||||
const Label &ll = l.lowLabel;
|
||||
if(ll.distance > 0) {
|
||||
lowForwardLabels(i, j).Unpack(kLabelPalette[ll.distance-1].Pack());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
highForwardLabels.DebugOutput("HighForwardLabels");
|
||||
lowForwardLabels.DebugOutput("LowForwardLabels");
|
||||
|
||||
Image highForwardImg(width, height);
|
||||
Image lowForwardImg(width, height);
|
||||
const uint32 *pixels = reinterpret_cast<const uint32 *>(cj.inBuf);
|
||||
for(uint32 j = 0; j < height; j++) {
|
||||
for(uint32 i = 0; i < width; i++) {
|
||||
const CompressionLabel &l = labels[j*width + i];
|
||||
|
||||
const Label &hl = l.highLabel;
|
||||
if(hl.distance > 0) {
|
||||
FasTC::Color c;
|
||||
uint32 nPs = 0;
|
||||
for(uint32 p = 0; p < hl.nLabels; p++) {
|
||||
FasTC::Color pc; pc.Unpack(pixels[hl.idxs[p]]);
|
||||
c += pc * static_cast<float>(hl.times[p]);
|
||||
nPs += hl.times[p];
|
||||
}
|
||||
c /= nPs;
|
||||
highForwardImg(i, j).Unpack(c.Pack());
|
||||
}
|
||||
|
||||
const Label &ll = l.lowLabel;
|
||||
if(ll.distance > 0) {
|
||||
FasTC::Color c;
|
||||
uint32 nPs = 0;
|
||||
for(uint32 p = 0; p < ll.nLabels; p++) {
|
||||
FasTC::Color pc; pc.Unpack(pixels[ll.idxs[p]]);
|
||||
c += pc * static_cast<float>(ll.times[p]);
|
||||
nPs += ll.times[p];
|
||||
}
|
||||
c /= nPs;
|
||||
lowForwardImg(i, j).Unpack(c.Pack());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
highForwardImg.DebugOutput("HighForwardImg");
|
||||
lowForwardImg.DebugOutput("LowForwardImg");
|
||||
|
||||
std::cout << "Output Forward images." << std::endl;
|
||||
#endif
|
||||
|
||||
// Then traverse backward...
|
||||
LabelImageBackward(labels, width, height);
|
||||
|
||||
#ifndef NDEBUG
|
||||
Image highImg(width, height);
|
||||
Image lowImg(width, height);
|
||||
for(uint32 j = 0; j < height; j++) {
|
||||
for(uint32 i = 0; i < width; i++) {
|
||||
const CompressionLabel &l = labels[j*width + i];
|
||||
|
||||
const Label &hl = l.highLabel;
|
||||
if(hl.distance > 0) {
|
||||
FasTC::Color c;
|
||||
for(uint32 p = 0; p < hl.nLabels; p++) {
|
||||
FasTC::Color pc; pc.Unpack(pixels[hl.idxs[p]]);
|
||||
c += pc;
|
||||
}
|
||||
c /= hl.nLabels;
|
||||
highImg(i, j).Unpack(c.Pack());
|
||||
}
|
||||
|
||||
const Label &ll = l.lowLabel;
|
||||
if(ll.distance > 0) {
|
||||
FasTC::Color c;
|
||||
for(uint32 p = 0; p < ll.nLabels; p++) {
|
||||
FasTC::Color pc; pc.Unpack(pixels[ll.idxs[p]]);
|
||||
c += pc;
|
||||
}
|
||||
c /= ll.nLabels;
|
||||
lowImg(i, j).Unpack(c.Pack());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
highImg.DebugOutput("HighImg");
|
||||
lowImg.DebugOutput("LowImg");
|
||||
|
||||
std::cout << "Output images." << std::endl;
|
||||
#endif
|
||||
|
||||
// Then combine everything...
|
||||
GenerateLowHighImages(labels, cj.inBuf, cj.outBuf, width, height);
|
||||
|
||||
// Then compute modulation values
|
||||
GenerateModulationValues(cj.outBuf, cj.inBuf, width, height);
|
||||
|
||||
// Cleanup
|
||||
free(labels);
|
||||
}
|
||||
} // namespace PVRTCC
|
||||
|
|
Loading…
Reference in a new issue