Refactor RGBAEndpoints

Changed the RGBAEndpoints to use the vector/matrix classes in FasTCBase. This caused a ~20ms performance hit on an 8-core machine which is likely due to the compiler having difficulty compiling away some procedure call overheads. Upon profiling, the biggest bottleneck is still by far the QuantizedError function, so any and all further optimization should be focused on that.
2025-01-07 01:45:28 +00:00 · 2014-03-21 01:21:07 -04:00 · 2014-03-21 01:21:07 -04:00 · 6954d7b154
parent 2b17cf6f97
commit 6954d7b154
3 changed files with 98 additions and 529 deletions
--- a/BPTCEncoder/src/Compressor.cpp
+++ b/BPTCEncoder/src/Compressor.cpp
@ -489,8 +489,8 @@ double CompressionMode::CompressSingleColor(
      bestPbitCombo = pbi;

      for(uint32 ci = 0; ci < kNumColorChannels; ci++) {
-        p1.c[ci] = static_cast<float>(bestValI[ci]);
-        p2.c[ci] = static_cast<float>(bestValJ[ci]);
+        p1[ci] = static_cast<float>(bestValI[ci]);
+        p2[ci] = static_cast<float>(bestValJ[ci]);
      }
    }
  }
@ -511,27 +511,27 @@ static void ChangePointForDirWithoutPbitChange(
  RGBAVector &v, uint32 dir, const float step[kNumColorChannels]
 ) {
  if(dir % 2) {
-    v.x -= step[0];
+    v.X() -= step[0];
  } else {
-    v.x += step[0];
+    v.X() += step[0];
  }

  if(((dir / 2) % 2)) {
-    v.y -= step[1];
+    v.Y() -= step[1];
  } else  {
-    v.y += step[1];
+    v.Y() += step[1];
  }

  if(((dir / 4) % 2)) {
-    v.z -= step[2];
+    v.Z() -= step[2];
  } else {
-    v.z += step[2];
+    v.Z() += step[2];
  }

  if(((dir / 8) % 2)) {
-    v.a -= step[3];
+    v.W() -= step[3];
  } else {
-    v.a += step[3];
+    v.W() += step[3];
  }
 }

@ -539,27 +539,27 @@ static void ChangePointForDirWithPbitChange(
  RGBAVector &v, uint32 dir, uint32 oldPbit, const float step[kNumColorChannels]
 ) {
  if(dir % 2 && oldPbit == 0) {
-    v.x -= step[0];
+    v.X() -= step[0];
  } else if(!(dir % 2) && oldPbit == 1) {
-    v.x += step[0];
+    v.X() += step[0];
  }

  if(((dir / 2) % 2) && oldPbit == 0) {
-    v.y -= step[1];
+    v.Y() -= step[1];
  } else if(!((dir / 2) % 2) && oldPbit == 1) {
-    v.y += step[1];
+    v.Y() += step[1];
  }

  if(((dir / 4) % 2) && oldPbit == 0) {
-    v.z -= step[2];
+    v.Z() -= step[2];
  } else if(!((dir / 4) % 2) && oldPbit == 1) {
-    v.z += step[2];
+    v.Z() += step[2];
  }

  if(((dir / 8) % 2) && oldPbit == 0) {
-    v.a -= step[3];
+    v.W() -= step[3];
  } else if(!((dir / 8) % 2) && oldPbit == 1) {
-    v.a += step[3];
+    v.W() += step[3];
  }
 }

@ -628,7 +628,7 @@ void CompressionMode::PickBestNeighboringEndpoints(
      }

      for(uint32 i = 0; i < kNumColorChannels; i++) {
-        np.c[i] = std::min(std::max(np.c[i], 0.0f), 255.0f);
+        np[i] = std::min(std::max(np[i], 0.0f), 255.0f);
      }
    }

@ -821,20 +821,20 @@ double CompressionMode::CompressCluster(
      break;

      case 1:
-        swap(v.r, v.a);
+        swap(v.R(), v.A());
        break;

      case 2:
-        swap(v.g, v.a);
+        swap(v.G(), v.A());
        break;

      case 3:
-        swap(v.b, v.a);
+        swap(v.B(), v.A());
        break;
    }

-    alphaVals[i] = v.a;
-    v.a = 255.0f;
+    alphaVals[i] = v.A();
+    v.A() = 255.0f;

    alphaMin = std::min(alphaVals[i], alphaMin);
    alphaMax = std::max(alphaVals[i], alphaMax);
@ -857,7 +857,7 @@ double CompressionMode::CompressCluster(
  const tInterpLevel *interpVals =
    kInterpolationValues + (GetNumberOfBitsPerAlpha() - 1);

-  const float weight = GetErrorMetric().a;
+  const float weight = GetErrorMetric().A();

  const uint32 nBuckets = (1 << GetNumberOfBitsPerAlpha());

@ -1059,8 +1059,8 @@ double CompressionMode::CompressCluster(
  }

  for(uint32 i = 0; i < kNumColorChannels; i++) {
-    p1.c[i] = (i == (kNumColorChannels-1))? a1 : rgbp1.c[i];
-    p2.c[i] = (i == (kNumColorChannels-1))? a2 : rgbp2.c[i];
+    p1[i] = (i == (kNumColorChannels-1))? a1 : rgbp1[i];
+    p2[i] = (i == (kNumColorChannels-1))? a2 : rgbp2[i];
  }

  return rgbError + alphaError;
@ -1088,13 +1088,9 @@ double CompressionMode::CompressCluster(
  const uint32 nBuckets = (1 << GetNumberOfBitsPerIndex());

 #if 1
-  RGBAVector avg =
-    cluster.GetTotal() / static_cast<float>(cluster.GetNumPoints());
+  RGBAVector avg = cluster.GetAvg();
  RGBADir axis;
-  double eigOne;
-  ::GetPrincipalAxis(
-    cluster.GetNumPoints(), cluster.GetPoints(), axis, eigOne, NULL
-  );
+  ::GetPrincipalAxis(cluster.GetNumPoints(), cluster.GetPoints(), axis, NULL, NULL);

  float mindp = FLT_MAX, maxdp = -FLT_MAX;
  for(uint32 i = 0 ; i < cluster.GetNumPoints(); i++) {
@ -1457,7 +1453,6 @@ double CompressionMode::Compress(
 ) {

  const int kModeNumber = GetModeNumber();
-  const int nPartitionBits = GetNumberOfPartitionBits();
  const int nSubsets = GetNumberOfSubsets();

  Params params(shapeIdx);
@ -2012,10 +2007,10 @@ static void CompressBC7Block(const uint32 *block, uint8 *outBuf) {
  for(uint32 i = 0; i < kMaxNumDataPoints; i++) {
    RGBAVector p = RGBAVector(i, block[i]);
    blockCluster.AddPoint(p);
-    if(fabs(p.a - 255.0f) > 1e-10)
+    if(fabs(p.A() - 255.0f) > 1e-10)
      opaque = false;

-    if(p.a > 0.0f)
+    if(p.A() > 0.0f)
      transparent = false;
  }

@ -2329,11 +2324,11 @@ static void CompressBC7Block(
  for(uint32 i = 0; i < kMaxNumDataPoints; i++) {
    RGBAVector p = RGBAVector(i, block[i]);
    blockCluster.AddPoint(p);
-    if(fabs(p.a - 255.0f) > 1e-10) {
+    if(fabs(p.A() - 255.0f) > 1e-10) {
      opaque = false;
    }

-    if(p.a > 0.0f) {
+    if(p.A() > 0.0f) {
      transparent = false;
    }
  }
--- a/BPTCEncoder/src/RGBAEndpoints.cpp
+++ b/BPTCEncoder/src/RGBAEndpoints.cpp
@ -237,104 +237,16 @@ uint8 QuantizeChannel(const uint8 val, const uint8 mask, const int pBit) {

 uint32 RGBAVector::ToPixel(const uint32 channelMask, const int pBit) const {

-  const uint8 pRet0 = QuantizeChannel(uint32(r + 0.5) & 0xFF, channelMask & 0xFF, pBit);
-  const uint8 pRet1 = QuantizeChannel(uint32(g + 0.5) & 0xFF, (channelMask >> 8) & 0xFF, pBit);
-  const uint8 pRet2 = QuantizeChannel(uint32(b + 0.5) & 0xFF, (channelMask >> 16) & 0xFF, pBit);
-  const uint8 pRet3 = QuantizeChannel(uint32(a + 0.5) & 0xFF, (channelMask >> 24) & 0xFF, pBit);
+  const uint8 pRet0 = QuantizeChannel(uint32(R() + 0.5) & 0xFF, channelMask & 0xFF, pBit);
+  const uint8 pRet1 = QuantizeChannel(uint32(G() + 0.5) & 0xFF, (channelMask >> 8) & 0xFF, pBit);
+  const uint8 pRet2 = QuantizeChannel(uint32(B() + 0.5) & 0xFF, (channelMask >> 16) & 0xFF, pBit);
+  const uint8 pRet3 = QuantizeChannel(uint32(A() + 0.5) & 0xFF, (channelMask >> 24) & 0xFF, pBit);

  const uint32 ret = pRet0 | (pRet1 << 8) | (pRet2 << 16) | (pRet3 << 24);

  return ret;
 }

-///////////////////////////////////////////////////////////////////////////////
-//
-// RGBAMatrix implementation
-//
-///////////////////////////////////////////////////////////////////////////////
-
-RGBAMatrix &RGBAMatrix::operator *=(const RGBAMatrix &mat) {
-  *this = ((*this) * mat);
-  return (*this);
-}
-
-RGBAMatrix RGBAMatrix::operator *(const RGBAMatrix &mat) const {
-
-  RGBAMatrix result;
-
-  for(int i = 0; i < 4; i++) {
-    for(int j = 0; j < 4; j++) {
-
-      result(i, j) = 0.0f;
-      for(int k = 0; k < 4; k++) {
-        result(i, j) += m[i*4 + k] * mat.m[k*4 + j];
-      }
-    }
-  }
-
-  return result;
-}
-
-RGBAVector RGBAMatrix::operator *(const RGBAVector &p) const {
-  return RGBAVector (
-    p.x * m1 + p.y * m2 + p.z * m3 + p.w * m4,
-    p.x * m5 + p.y * m6 + p.z * m7 + p.w * m8,
-    p.x * m9 + p.y * m10 + p.z * m11 + p.w * m12,
-    p.x * m13 + p.y * m14 + p.z * m15 + p.w * m16
-  );
-}
-
-RGBAMatrix RGBAMatrix::RotateX(float rad) {
-  RGBAMatrix result;
-  result.m6 = result.m11 = cos(rad);
-  result.m10 = sin(rad);
-  result.m7 = -result.m10;
-  return result;
-}
-
-RGBAMatrix RGBAMatrix::RotateY(float rad) {
-  RGBAMatrix result;
-  result.m1 = result.m11 = cos(rad);
-  result.m3 = sin(rad);
-  result.m9 = -result.m3;
-  return result;
-}
-
-RGBAMatrix RGBAMatrix::RotateZ(float rad) {
-  RGBAMatrix result;
-  result.m1 = result.m6 = cos(rad);
-  result.m5 = sin(rad);
-  result.m2 = -result.m5;
-  return result;
-}
-
-RGBAMatrix RGBAMatrix::Translate(const RGBAVector &t) {
-  RGBAMatrix result;
-  result.m4 = t.x;
-  result.m8 = t.y;
-  result.m12 = t.z;
-  result.m16 = t.w;
-  return result;
-}
-
-bool RGBAMatrix::Identity() {
-  for(int i = 0; i < 4; i++) {
-    for(int j = 0; j < 4; j++) {
-
-      if(i == j) {
-        if(fabs(m[i*4 + j] - 1.0f) > 1e-5)
-          return false;
-      }
-      else {
-        if(fabs(m[i*4 + j]) > 1e-5)
-          return false;
-      }
-    }
-  }
-
-  return true;
-}
-
 ///////////////////////////////////////////////////////////////////////////////
 //
 // Cluster implementation
@ -347,8 +259,6 @@ RGBACluster::RGBACluster(const RGBACluster &left, const RGBACluster &right) {
    const RGBAVector &p = right.m_DataPoints[i];
    AddPoint(p);
  }
-
-  m_PrincipalAxisCached = false;
 }  

 void RGBACluster::AddPoint(const RGBAVector &p) {
@ -358,64 +268,11 @@ void RGBACluster::AddPoint(const RGBAVector &p) {
  m_PointBitString |= 1 << p.GetIdx();

  for(uint32 i = 0; i < kNumColorChannels; i++) {
-    m_Min.c[i] = min(p.c[i], m_Min.c[i]);
-    m_Max.c[i] = max(p.c[i], m_Max.c[i]);
+    m_Min[i] = min(p[i], m_Min[i]);
+    m_Max[i] = max(p[i], m_Max[i]);
  }
 }

-void RGBACluster::GetPrincipalAxis(RGBADir &axis) {
-
-  if(m_PrincipalAxisCached) {
-    axis = m_PrincipalAxis;
-    return;
-  }
-
-  m_PowerMethodIterations = ::GetPrincipalAxis(
-    m_NumPoints, 
-    m_DataPoints, 
-    m_PrincipalAxis, 
-    m_PrincipalEigenvalue, 
-    &m_SecondEigenvalue
-  );
-
-  m_PrincipalAxisCached = true;
-
-  GetPrincipalAxis(axis);
-}
-
-double RGBACluster::GetPrincipalEigenvalue() {
-
-  if(!m_PrincipalAxisCached) {
-    RGBADir dummy;
-    GetPrincipalAxis(dummy);
-  }
-  
-  assert(m_PrincipalAxisCached);
-  return m_PrincipalEigenvalue;
-}
-
-double RGBACluster::GetSecondEigenvalue() {
-
-  if(!m_PrincipalAxisCached) {
-    RGBADir dummy;
-    GetPrincipalAxis(dummy);
-  }
-  
-  assert(m_PrincipalAxisCached);
-  return m_SecondEigenvalue;
-}
-
-uint32 RGBACluster::GetPowerMethodIterations() {
-
-  if(!m_PrincipalAxisCached) {
-    RGBADir dummy;
-    GetPrincipalAxis(dummy);
-  }
-  
-  assert(m_PrincipalAxisCached);
-  return m_PowerMethodIterations;
-}
-
 double RGBACluster::QuantizedError(
  const RGBAVector &p1, const RGBAVector &p2,
  uint8 nBuckets, uint32 bitMask, const RGBAVector &errorMetricVec,
@ -467,7 +324,7 @@ double RGBACluster::QuantizedError(
      for(uint32 k = 0; k < kNumColorChannels; k++) {
        const uint8 ip = (((uint32(pqp1[k]) * interp0) + (uint32(pqp2[k]) * interp1) + 32) >> 6) & 0xFF;
        const uint8 dist = sad(pb[k], ip);
-        errorVec.c[k] = kFloatConversion[dist] * metric.c[k];
+        errorVec[k] = kFloatConversion[dist] * metric[k];
      }
      
      float error = errorVec * errorVec;
@ -501,68 +358,13 @@ double RGBACluster::QuantizedError(
 ///////////////////////////////////////////////////////////////////////////////

 void ClampEndpoints(RGBAVector &p1, RGBAVector &p2) {
-  clamp(p1.r, 0.0f, 255.0f);
-  clamp(p1.g, 0.0f, 255.0f);
-  clamp(p1.b, 0.0f, 255.0f);
-  clamp(p1.a, 0.0f, 255.0f);
-
-  clamp(p2.r, 0.0f, 255.0f);
-  clamp(p2.g, 0.0f, 255.0f);
-  clamp(p2.b, 0.0f, 255.0f);
-  clamp(p2.a, 0.0f, 255.0f);
+  for(uint32 i = 0; i < 4; i++) {
+    clamp(p1[i], 0.0f, 255.0f);
+    clamp(p2[i], 0.0f, 255.0f);
+  }
 }

-static uint32 PowerIteration(const RGBAMatrix &mat, RGBADir &eigVec, double &eigVal) {
-
-  int numIterations = 0;
-  const int kMaxNumIterations = 200;
-
-  for(int nTries = 0; nTries < 3; nTries++) {
-  // !SPEED! Find eigenvectors by using the power method. This is good because the
-  // matrix is only 4x4, which allows us to use SIMD...
-  RGBAVector b = RGBAVector(float(rand()) + 1.0f);
-  b /= b.Length();
-
-  bool fixed = false;
-  numIterations = 0;
-  while(!fixed && ++numIterations < kMaxNumIterations) {
-
-    RGBAVector newB = mat * b;
-
-    // !HACK! If the principal eigenvector of the covariance matrix
-    // converges to zero, that means that the points lie equally 
-    // spaced on a sphere in this space. In this (extremely rare)
-    // situation, just choose a point and use it as the principal 
-    // direction.
-    const float newBlen = newB.Length();
-    if(newBlen < 1e-10) {
-      eigVec = b;
-      eigVal = 0.0;
-      return numIterations;
-    }
-
-    eigVal = newB.Length();
-    newB /= float(eigVal);
-
-    if(fabs(1.0f - (b * newB)) < 1e-5)
-      fixed = true;
-
-    b = newB;
-  }
-
-  eigVec = b;  
-  if(numIterations < kMaxNumIterations) {
-    break;
-  }
-  }
-
-  if(numIterations == kMaxNumIterations) {
-    eigVal = 0.0;
-  }
-  return numIterations;
-}
-
-uint32 GetPrincipalAxis(uint32 nPts, const RGBAVector *pts, RGBADir &axis, double &eigOne, double *eigTwo) {
+uint32 GetPrincipalAxis(uint32 nPts, const RGBAVector *pts, RGBADir &axis, float *eigOne, float *eigTwo) {

  assert(nPts <= kMaxNumDataPoints);

@ -579,7 +381,7 @@ uint32 GetPrincipalAxis(uint32 nPts, const RGBAVector *pts, RGBADir &axis, doubl
    toPts[i] = pts[i] - avg;

    for(uint32 j = 0; j < kNumColorChannels; j++) {
-      toPtsMax.c[j] = max(toPtsMax.c[j], toPts[i].c[j]);
+      toPtsMax[j] = max(toPtsMax[j], toPts[i][j]);
    }
  }

@ -602,7 +404,7 @@ uint32 GetPrincipalAxis(uint32 nPts, const RGBAVector *pts, RGBADir &axis, doubl
  assert(uptsIdx > 0);

  if(uptsIdx == 1) {
-    axis.r = axis.g = axis.b = axis.a = 0.0f;
+    axis.R() = axis.G() = axis.B() = axis.A() = 0.0f;
    return 0;

  // Collinear?
@ -631,7 +433,7 @@ uint32 GetPrincipalAxis(uint32 nPts, const RGBAVector *pts, RGBADir &axis, doubl

      float sum = 0.0;
      for(uint32 k = 0; k < nPts; k++) {
-        sum += toPts[k].c[i] * toPts[k].c[j];
+        sum += toPts[k][i] * toPts[k][j];
      }

      covMatrix(i, j) = sum / kFloatConversion[kNumColorChannels - 1];
@ -639,17 +441,17 @@ uint32 GetPrincipalAxis(uint32 nPts, const RGBAVector *pts, RGBADir &axis, doubl
    }
  }
  
-  uint32 iters = PowerIteration(covMatrix, axis, eigOne);
+  uint32 iters = covMatrix.PowerMethod(axis, eigOne);
+  if(NULL != eigTwo && NULL != eigOne) {
+    if(*eigOne != 0.0) {
+      RGBAMatrix reduced;
+      for(uint32 j = 0; j < 4; j++) {
+        for(uint32 i = 0; i < 4; i++) {
+          reduced(i, j) = axis[j] * axis[i];
+        }
+      }

-  if(NULL != eigTwo) {
-    if(eigOne != 0.0) {
-      RGBAMatrix reduced = covMatrix - eigOne * RGBAMatrix(
-        axis.c[0] * axis.c[0], axis.c[0] * axis.c[1], axis.c[0] * axis.c[2], axis.c[0] * axis.c[3], 
-        axis.c[1] * axis.c[0], axis.c[1] * axis.c[1], axis.c[1] * axis.c[2], axis.c[1] * axis.c[3],
-        axis.c[2] * axis.c[0], axis.c[2] * axis.c[1], axis.c[2] * axis.c[2], axis.c[2] * axis.c[3],
-        axis.c[3] * axis.c[0], axis.c[3] * axis.c[1], axis.c[3] * axis.c[2], axis.c[3] * axis.c[3]
-      );
-      
+      reduced = covMatrix - ((*eigOne) * reduced);
      bool allZero = true;
      for(uint32 i = 0; i < 16; i++) {
        if(fabs(reduced[i]) > 0.0005) {
@ -662,7 +464,7 @@ uint32 GetPrincipalAxis(uint32 nPts, const RGBAVector *pts, RGBADir &axis, doubl
      }
      else {
        RGBADir dummyDir;
-        iters += PowerIteration(reduced, dummyDir, *eigTwo);
+        iters += reduced.PowerMethod(dummyDir, eigTwo);
      }
    }
    else {
--- a/BPTCEncoder/src/RGBAEndpoints.h
+++ b/BPTCEncoder/src/RGBAEndpoints.h
@ -67,6 +67,8 @@
 #define __RGBA_ENDPOINTS_H__

 #include "TexCompTypes.h"
+#include "Vector4.h"
+#include "Matrix4x4.h"

 #include <cmath>
 #include <cfloat>
@ -75,262 +77,48 @@
 static const uint32 kNumColorChannels = 4;
 static const uint32 kMaxNumDataPoints = 16;

-class RGBAVector {
-
-public:
-  union {
-    struct { float r, g, b, a; };
-    struct { float x, y, z, w; };
-    float c[4];
-  };
-
-  uint32 GetIdx() const { return  idx; }
-
-  RGBAVector() : r(-1.0), g(-1.0), b(-1.0), a(-1.0) { }
-  RGBAVector(uint32 _idx, uint32 pixel) : 
-    r(float(pixel & 0xFF)), 
-    g(float((pixel >> 8) & 0xFF)), 
-    b(float((pixel >> 16) & 0xFF)), 
-    a(float((pixel >> 24) & 0xFF)),
-    idx(_idx)
+class RGBAVector : public FasTC::Vector4<float> {
+  typedef FasTC::Vector4<float> BaseVector;
+ public:
+  uint32 GetIdx() const { return  m_Idx; }
+  RGBAVector() : BaseVector(-1.0, -1.0, -1.0, -1.0) { }
+  RGBAVector(uint32 idx, uint32 pixel) : 
+   BaseVector(
+    static_cast<float>(pixel & 0xFF),
+    static_cast<float>((pixel >> 8) & 0xFF),
+    static_cast<float>((pixel >> 16) & 0xFF),
+    static_cast<float>((pixel >> 24) & 0xFF)
+   )
+   , m_Idx(idx)
  { }

-  RGBAVector(float _r, float _g, float _b, float _a) :
-    r(_r), g(_g), b(_b), a(_a), idx(0) { }
+  RGBAVector(float _r, float _g, float _b, float _a)
+  : BaseVector(_r, _g, _b, _a), m_Idx(0) { }

-  explicit RGBAVector(float cc) : r(cc), g(cc), b(cc), a(cc), idx(0) { }
+  explicit RGBAVector(float cc) : BaseVector(cc, cc, cc, cc), m_Idx(0) { }

-  RGBAVector &operator =(const RGBAVector &other) {
-    this->idx = other.idx;
-    memcpy(c, other.c, sizeof(c));
-    return (*this);
-  }
-
-  RGBAVector operator +(const RGBAVector &p) const {
-    return RGBAVector(r + p.r, g + p.g, b + p.b, a + p.a);
-  }
-
-  RGBAVector &operator +=(const RGBAVector &p) {
-    r += p.r; g += p.g; b += p.b; a += p.a;
-    return *this;
-  }
-
-  RGBAVector operator -(const RGBAVector &p) const {
-    return RGBAVector(r - p.r, g - p.g, b - p.b, a - p.a);
-  }
-
-  RGBAVector &operator -=(const RGBAVector &p) {
-    r -= p.r; g -= p.g; b -= p.b; a -= p.a;
-    return *this;
-  }
-
-  RGBAVector operator /(const float s) const {
-    return RGBAVector(r / s, g / s, b / s, a / s);
-  }
-
-  RGBAVector &operator /=(const float s) {
-    r /= s; g /= s; b /= s; a /= s;
-    return *this;
-  }
-
-  float operator *(const RGBAVector &p) const {
-    return r * p.r + g * p.g + b * p.b + a * p.a;
-  }
-
-  float Length() const {
-    return sqrt((*this) * (*this));
-  }
-
-  RGBAVector &operator *=(const RGBAVector &v) {
-    r *= v.r; g *= v.g; b *= v.b; a *= v.a;
-    return *this;
-  }
-
-  RGBAVector operator *(const float s) const {
-    return RGBAVector(r * s, g * s, b * s, a * s);
-  }
-
-  friend RGBAVector operator *(const float s, const RGBAVector &p) {
-    return RGBAVector(p.r * s, p.g * s, p.b * s, p.a * s);
-  }
-
-  RGBAVector &operator *=(const float s) {
-    r *= s; g *= s; b *= s; a *= s;
-    return *this;
-  }
-
-  float &operator [](const int i) {
-    return c[i];
-  }
-
-  friend bool operator ==(const RGBAVector &rhs, const RGBAVector &lhs) {
-    const RGBAVector d = rhs - lhs;
-    return fabs(d.r) < 1e-7 && fabs(d.g) < 1e-7 && fabs(d.b) < 1e-7 && fabs(d.a) < 1e-7;
-  }
-
-  friend bool operator !=(const RGBAVector &rhs, const RGBAVector &lhs) {
-    return !(rhs == lhs);
-  }
-
-  operator float *() {
-    return c;
-  }
-
-  RGBAVector Cross(const RGBAVector &rhs) {
-    return RGBAVector(
-      rhs.y * z - y * rhs.z,
-      rhs.z * x - z * rhs.x,
-      rhs.x * y - x * rhs.y,
-      1.0f
-    );
-  }
+  const float &R() const { return vec[0]; }
+  float &R() { return vec[0]; }
+  const float &G() const { return vec[1]; }
+  float &G() { return vec[1]; }
+  const float &B() const { return vec[2]; }
+  float &B() { return vec[2]; }
+  const float &A() const { return vec[3]; }
+  float &A() { return vec[3]; }

  // Quantize this point.
  uint32 ToPixel(const uint32 channelMask = 0xFFFFFFFF, const int pBit = -1) const;

 private:
-  uint32 idx;
-};
-
-class RGBAMatrix {
-private:
-  union {
-    float m[kNumColorChannels*kNumColorChannels];
-    struct {
-      float m1, m2, m3, m4;
-      float m5, m6, m7, m8;
-      float m9, m10, m11, m12;
-      float m13, m14, m15, m16;
-    };
-  };
-
-  RGBAMatrix(const float *arr) {
-    memcpy(m, arr, sizeof(m));
-  }
-
-public:
-
-  RGBAMatrix(
-    float _m1, float _m2, float _m3, float _m4,
-    float _m5, float _m6, float _m7, float _m8,
-    float _m9, float _m10, float _m11, float _m12,
-    float _m13, float _m14, float _m15, float _m16
-  ) :
-    m1(_m1), m2(_m2), m3(_m3), m4(_m4),
-    m5(_m5), m6(_m6), m7(_m7), m8(_m8),
-    m9(_m9), m10(_m10), m11(_m11), m12(_m12),
-    m13(_m13), m14(_m14), m15(_m15), m16(_m16)
-  { }
-  
-  RGBAMatrix() : 
-    m1(1.0f), m2(0.0f), m3(0.0f), m4(0.0f),
-    m5(0.0f), m6(1.0f), m7(0.0f), m8(0.0f),
-    m9(0.0f), m10(0.0f), m11(1.0f), m12(0.0f),
-    m13(0.0f), m14(0.0f), m15(0.0f), m16(1.0f)
-  { }
-
-  RGBAMatrix &operator =(const RGBAMatrix &other) {
-    memcpy(m, other.m, sizeof(m));
-    return (*this);
-  }
-
-  RGBAMatrix operator +(const RGBAMatrix &p) const {
-    float newm[kNumColorChannels*kNumColorChannels];
-    for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] + p.m[i];
-    return RGBAMatrix(newm);
-  }
-
-  RGBAMatrix &operator +=(const RGBAMatrix &p) {
-    for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] += p.m[i];
-    return *this;
-  }
-
-  RGBAMatrix operator -(const RGBAMatrix &p) const {
-    float newm[kNumColorChannels*kNumColorChannels];
-    for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] - p.m[i];
-    return RGBAMatrix(newm);
-  }
-
-  RGBAMatrix &operator -=(const RGBAMatrix &p) {
-    for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] -= p.m[i];
-    return *this;
-  }
-
-  RGBAMatrix operator /(const float s) const {
-    float newm[kNumColorChannels*kNumColorChannels];
-    for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] / s;
-    return RGBAMatrix(newm);
-  }
-
-  RGBAMatrix &operator /=(const float s) {
-    for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] /= s;
-    return *this;
-  }
-
-  RGBAMatrix operator *(const float s) const {
-    float newm[kNumColorChannels*kNumColorChannels];
-    for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] * s;
-    return RGBAMatrix(newm);
-  }
-
-  RGBAMatrix operator *(const double s) const {
-    float newm[kNumColorChannels*kNumColorChannels];
-    for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = float(double(m[i]) * s);
-    return RGBAMatrix(newm);
-  }
-
-  friend RGBAMatrix operator *(const float s, const RGBAMatrix &p) {
-    float newm[kNumColorChannels*kNumColorChannels];
-    for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = p.m[i] * s;
-    return RGBAMatrix(newm);  
-  }
-
-  friend RGBAMatrix operator *(const double s, const RGBAMatrix &p) {
-    float newm[kNumColorChannels*kNumColorChannels];
-    for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = float(double(p.m[i]) * s);
-    return RGBAMatrix(newm);  
-  }
-
-  RGBAMatrix &operator *=(const float s) {
-    for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] *= s;
-    return *this;
-  }
-
-  float &operator ()(const int i, const int j) {
-    return (*this)[i*4 + j];
-  }
-
-  float &operator [](const int i) {
-    return m[i];
-  }
-
-  friend bool operator ==(const RGBAMatrix &rhs, const RGBAMatrix &lhs) {
-    const RGBAMatrix d = rhs - lhs;
-    for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++)
-      if(d.m[i] > 1e-10)
-        return false;
-    return true;
-  }
-
-  operator float *() {
-    return m;
-  }
-
-  RGBAVector operator *(const RGBAVector &p) const;
-  RGBAMatrix operator *(const RGBAMatrix &mat) const;
-  RGBAMatrix &operator *=(const RGBAMatrix &mat);
-  static RGBAMatrix RotateX(float rad);
-  static RGBAMatrix RotateY(float rad);
-  static RGBAMatrix RotateZ(float rad);
-  static RGBAMatrix Translate(const RGBAVector &t);
-  bool Identity();
+  uint32 m_Idx;
 };
+typedef FasTC::Matrix4x4<float> RGBAMatrix;

 class RGBADir : public RGBAVector {
-public:
+ public:
  RGBADir() : RGBAVector() { }
  RGBADir(const RGBAVector &p) : RGBAVector(p) {
-    *this /= Length();
+    this->Normalize();
  }
 };

@ -344,21 +132,14 @@ public:
    m_NumPoints(0), m_Total(0), 
    m_PointBitString(0),
    m_Min(FLT_MAX),
-    m_Max(-FLT_MAX),
-    m_PrincipalAxisCached(false)
+    m_Max(-FLT_MAX)
  { } 

  RGBACluster(const RGBACluster &c) : 
    m_NumPoints(c.m_NumPoints),
    m_Total(c.m_Total),
    m_PointBitString(c.m_PointBitString), 
-    m_Min(c.m_Min),
-    m_Max(c.m_Max),
-    m_PrincipalAxisCached(c.m_PrincipalAxisCached),
-    m_PrincipalEigenvalue(c.m_PrincipalEigenvalue),
-    m_SecondEigenvalue(c.m_SecondEigenvalue),
-    m_PowerMethodIterations(c.m_PowerMethodIterations),
-    m_PrincipalAxis(c.m_PrincipalAxis)
+    m_Min(c.m_Min), m_Max(c.m_Max)
  { 
    memcpy(this->m_DataPoints, c.m_DataPoints, m_NumPoints * sizeof(RGBAVector));
  }
@ -368,14 +149,12 @@ public:
    m_NumPoints(1),
    m_Total(p),
    m_PointBitString(0),
-    m_Min(p), m_Max(p),
-    m_PrincipalAxisCached(false)
+    m_Min(p), m_Max(p)
  { 
    m_DataPoints[0] = p;
    m_PointBitString |= (1 << p.GetIdx());
  }
      
-  RGBAVector GetTotal() const { return m_Total; }
  const RGBAVector &GetPoint(int idx) const { return m_DataPoints[idx]; }
  uint32 GetNumPoints() const { return m_NumPoints; }
  RGBAVector GetAvg() const { return m_Total / float(m_NumPoints); }
@ -395,16 +174,10 @@ public:
    const int pbits[2] = NULL, uint8 *indices = NULL) const;

  // Returns the principal axis for this point cluster.
-  double GetPrincipalEigenvalue();
-  double GetSecondEigenvalue();
-  uint32 GetPowerMethodIterations();
-  void GetPrincipalAxis(RGBADir &axis);
-
  bool AllSamePoint() const { return m_Max == m_Min; }
  int GetPointBitString() const { return m_PointBitString; }

 private:
-
  // The number of points in the cluster.
  uint32 m_NumPoints;

@ -415,15 +188,14 @@ private:

  int m_PointBitString;
  RGBAVector m_Min, m_Max;
-
-  bool m_PrincipalAxisCached;
-  double m_PrincipalEigenvalue;
-  double m_SecondEigenvalue;
-  uint32 m_PowerMethodIterations;
-  RGBADir m_PrincipalAxis;
 };

 extern uint8 QuantizeChannel(const uint8 val, const uint8 mask, const int pBit = -1);
-extern uint32 GetPrincipalAxis(uint32 nPts, const RGBAVector *pts, RGBADir &axis, double &eigOne, double *eigTwo);
+extern uint32 GetPrincipalAxis(uint32 nPts, const RGBAVector *pts, RGBADir &axis, float *eigOne, float *eigTwo);
+
+namespace FasTC {
+  REGISTER_VECTOR_TYPE(RGBAVector);
+  REGISTER_VECTOR_TYPE(RGBADir);
+}

 #endif //__RGBA_ENDPOINTS_H__