mirror of
https://github.com/yuzu-emu/FasTC.git
synced 2025-01-07 01:55:37 +00:00
More perf fixes
This commit is contained in:
parent
1bfda210c0
commit
2141d71dd0
|
@ -215,6 +215,11 @@ uint32 RGBAVector::ToPixel(const uint32 channelMask, const int pBit) const {
|
|||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<typename T>
|
||||
static inline T Clamp(const T &x, const T &a, const T &b) {
|
||||
return std::max(a, std::min(x, b));
|
||||
}
|
||||
|
||||
template<const uint8 nBuckets>
|
||||
double RGBACluster::QuantizedError(
|
||||
const RGBAVector &p1, const RGBAVector &p2,
|
||||
|
@ -240,20 +245,68 @@ double RGBACluster::QuantizedError(
|
|||
qp2 = p2.ToPixel(bitMask);
|
||||
}
|
||||
|
||||
const RGBAVector uqp1 = RGBAVector(0, qp1);
|
||||
const RGBAVector uqp2 = RGBAVector(0, qp2);
|
||||
const float uqplsq = (uqp1 - uqp2).LengthSq();
|
||||
const RGBAVector uqpdir = uqp2 - uqp1;
|
||||
|
||||
const uint8 *pqp1 = reinterpret_cast<const uint8 *>(&qp1);
|
||||
const uint8 *pqp2 = reinterpret_cast<const uint8 *>(&qp2);
|
||||
|
||||
const RGBAVector metric = errorMetricVec;
|
||||
|
||||
float totalError = 0.0;
|
||||
if(uqplsq == 0) {
|
||||
|
||||
// If both endpoints are the same then the indices don't matter...
|
||||
for(uint32 i = 0; i < GetNumPoints(); i++) {
|
||||
|
||||
const uint32 pixel = GetPixel(i);
|
||||
const uint8 *pb = (const uint8 *)(&pixel);
|
||||
|
||||
uint32 interp0 = (*interpVals)[0][0];
|
||||
uint32 interp1 = (*interpVals)[0][1];
|
||||
|
||||
RGBAVector errorVec (0.0f);
|
||||
for(uint32 k = 0; k < 4; k++) {
|
||||
const uint32 ip = (((pqp1[k] * interp0) + (pqp2[k] * interp1) + 32) >> 6) & 0xFF;
|
||||
const uint8 dist = sad<uint8>(pb[k], ip);
|
||||
errorVec[k] = static_cast<float>(dist) * metric[k];
|
||||
}
|
||||
|
||||
totalError += errorVec * errorVec;
|
||||
|
||||
if(indices)
|
||||
indices[i] = 0;
|
||||
}
|
||||
|
||||
return totalError;
|
||||
}
|
||||
|
||||
for(uint32 i = 0; i < GetNumPoints(); i++) {
|
||||
|
||||
// Project this point unto the direction denoted by uqpdir...
|
||||
const RGBAVector pt = GetPoint(i);
|
||||
#if 0
|
||||
const float pct = Clamp(((pt - uqp1) * uqpdir) / uqplsq, 0.0f, 1.0f);
|
||||
const int32 j1 = static_cast<int32>(pct * static_cast<float>(nBuckets-1));
|
||||
const int32 j2 = static_cast<int32>(pct * static_cast<float>(nBuckets-1) + 0.7);
|
||||
#else
|
||||
const float pct = ((pt - uqp1) * uqpdir) / uqplsq;
|
||||
int32 j1 = floor(pct * static_cast<float>(nBuckets-1));
|
||||
int32 j2 = ceil(pct * static_cast<float>(nBuckets-1));
|
||||
j1 = std::max(0, j1);
|
||||
j2 = std::min(j2, nBuckets - 1);
|
||||
#endif
|
||||
|
||||
assert(j1 >= 0 && j2 <= nBuckets - 1);
|
||||
|
||||
const uint32 pixel = GetPixel(i);
|
||||
const uint8 *pb = (const uint8 *)(&pixel);
|
||||
|
||||
float minError = FLT_MAX;
|
||||
uint8 bestBucket = 0;
|
||||
for(int j = 0; j < nBuckets; j++) {
|
||||
for(int32 j = j1; j <= j2; j++) {
|
||||
|
||||
uint32 interp0 = (*interpVals)[j][0];
|
||||
uint32 interp1 = (*interpVals)[j][1];
|
||||
|
|
Loading…
Reference in a new issue