diff --git a/BPTCEncoder/src/RGBAEndpoints.cpp b/BPTCEncoder/src/RGBAEndpoints.cpp index e21abbb..1cbb722 100755 --- a/BPTCEncoder/src/RGBAEndpoints.cpp +++ b/BPTCEncoder/src/RGBAEndpoints.cpp @@ -215,6 +215,11 @@ uint32 RGBAVector::ToPixel(const uint32 channelMask, const int pBit) const { // /////////////////////////////////////////////////////////////////////////////// +template +static inline T Clamp(const T &x, const T &a, const T &b) { + return std::max(a, std::min(x, b)); +} + template double RGBACluster::QuantizedError( const RGBAVector &p1, const RGBAVector &p2, @@ -240,20 +245,68 @@ double RGBACluster::QuantizedError( qp2 = p2.ToPixel(bitMask); } + const RGBAVector uqp1 = RGBAVector(0, qp1); + const RGBAVector uqp2 = RGBAVector(0, qp2); + const float uqplsq = (uqp1 - uqp2).LengthSq(); + const RGBAVector uqpdir = uqp2 - uqp1; + const uint8 *pqp1 = reinterpret_cast(&qp1); const uint8 *pqp2 = reinterpret_cast(&qp2); const RGBAVector metric = errorMetricVec; float totalError = 0.0; + if(uqplsq == 0) { + + // If both endpoints are the same then the indices don't matter... + for(uint32 i = 0; i < GetNumPoints(); i++) { + + const uint32 pixel = GetPixel(i); + const uint8 *pb = (const uint8 *)(&pixel); + + uint32 interp0 = (*interpVals)[0][0]; + uint32 interp1 = (*interpVals)[0][1]; + + RGBAVector errorVec (0.0f); + for(uint32 k = 0; k < 4; k++) { + const uint32 ip = (((pqp1[k] * interp0) + (pqp2[k] * interp1) + 32) >> 6) & 0xFF; + const uint8 dist = sad(pb[k], ip); + errorVec[k] = static_cast(dist) * metric[k]; + } + + totalError += errorVec * errorVec; + + if(indices) + indices[i] = 0; + } + + return totalError; + } + for(uint32 i = 0; i < GetNumPoints(); i++) { + // Project this point unto the direction denoted by uqpdir... + const RGBAVector pt = GetPoint(i); +#if 0 + const float pct = Clamp(((pt - uqp1) * uqpdir) / uqplsq, 0.0f, 1.0f); + const int32 j1 = static_cast(pct * static_cast(nBuckets-1)); + const int32 j2 = static_cast(pct * static_cast(nBuckets-1) + 0.7); +#else + const float pct = ((pt - uqp1) * uqpdir) / uqplsq; + int32 j1 = floor(pct * static_cast(nBuckets-1)); + int32 j2 = ceil(pct * static_cast(nBuckets-1)); + j1 = std::max(0, j1); + j2 = std::min(j2, nBuckets - 1); +#endif + + assert(j1 >= 0 && j2 <= nBuckets - 1); + const uint32 pixel = GetPixel(i); const uint8 *pb = (const uint8 *)(&pixel); float minError = FLT_MAX; uint8 bestBucket = 0; - for(int j = 0; j < nBuckets; j++) { + for(int32 j = j1; j <= j2; j++) { uint32 interp0 = (*interpVals)[j][0]; uint32 interp1 = (*interpVals)[j][1];