More perf fixes

2025-08-03 23:01:13 +00:00 · 2014-03-26 18:11:12 -04:00 · 2014-03-26 18:11:12 -04:00 · 2141d71dd0
parent 1bfda210c0
commit 2141d71dd0
1 changed files with 54 additions and 1 deletions
--- a/BPTCEncoder/src/RGBAEndpoints.cpp
+++ b/BPTCEncoder/src/RGBAEndpoints.cpp
@ -215,6 +215,11 @@ uint32 RGBAVector::ToPixel(const uint32 channelMask, const int pBit) const {
 //
 ///////////////////////////////////////////////////////////////////////////////

+template<typename T>
+static inline T Clamp(const T &x, const T &a, const T &b) {
+  return std::max(a, std::min(x, b));
+}
+
 template<const uint8 nBuckets>
 double RGBACluster::QuantizedError(
  const RGBAVector &p1, const RGBAVector &p2,
@ -240,20 +245,68 @@ double RGBACluster::QuantizedError(
    qp2 = p2.ToPixel(bitMask);
  }

+  const RGBAVector uqp1 = RGBAVector(0, qp1);
+  const RGBAVector uqp2 = RGBAVector(0, qp2);
+  const float uqplsq = (uqp1 - uqp2).LengthSq();
+  const RGBAVector uqpdir = uqp2 - uqp1;
+
  const uint8 *pqp1 = reinterpret_cast<const uint8 *>(&qp1);
  const uint8 *pqp2 = reinterpret_cast<const uint8 *>(&qp2);

  const RGBAVector metric = errorMetricVec;

  float totalError = 0.0;
+  if(uqplsq == 0) {
+
+    // If both endpoints are the same then the indices don't matter...
+    for(uint32 i = 0; i < GetNumPoints(); i++) {
+
+      const uint32 pixel = GetPixel(i);
+      const uint8 *pb = (const uint8 *)(&pixel);
+
+      uint32 interp0 = (*interpVals)[0][0];
+      uint32 interp1 = (*interpVals)[0][1];
+
+      RGBAVector errorVec (0.0f);
+      for(uint32 k = 0; k < 4; k++) {
+        const uint32 ip = (((pqp1[k] * interp0) + (pqp2[k] * interp1) + 32) >> 6) & 0xFF;
+        const uint8 dist = sad<uint8>(pb[k], ip);
+        errorVec[k] = static_cast<float>(dist) * metric[k];
+      }
+
+      totalError += errorVec * errorVec;
+
+      if(indices)
+        indices[i] = 0;
+    }
+
+    return totalError;
+  }
+
  for(uint32 i = 0; i < GetNumPoints(); i++) {

+    // Project this point unto the direction denoted by uqpdir...
+    const RGBAVector pt = GetPoint(i);
+#if 0
+    const float pct = Clamp(((pt - uqp1) * uqpdir) / uqplsq, 0.0f, 1.0f);
+    const int32 j1 = static_cast<int32>(pct * static_cast<float>(nBuckets-1));
+    const int32 j2 = static_cast<int32>(pct * static_cast<float>(nBuckets-1) + 0.7);
+#else
+    const float pct = ((pt - uqp1) * uqpdir) / uqplsq;
+    int32 j1 = floor(pct * static_cast<float>(nBuckets-1));
+    int32 j2 = ceil(pct * static_cast<float>(nBuckets-1));
+    j1 = std::max(0, j1);
+    j2 = std::min(j2, nBuckets - 1);
+#endif
+
+    assert(j1 >= 0 && j2 <= nBuckets - 1);
+
    const uint32 pixel = GetPixel(i);
    const uint8 *pb = (const uint8 *)(&pixel);

    float minError = FLT_MAX;
    uint8 bestBucket = 0;
-    for(int j = 0; j < nBuckets; j++) {
+    for(int32 j = j1; j <= j2; j++) {

      uint32 interp0 = (*interpVals)[j][0];
      uint32 interp1 = (*interpVals)[j][1];