Get rid of evil tabs once and forever (from cpp/h files)

This commit is contained in:
Pavel Krajcevski 2013-08-26 16:54:08 -04:00
parent af2318027b
commit 03a7934644
24 changed files with 3303 additions and 3259 deletions

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -89,22 +89,22 @@ static T max(const T &a, const T &b) {
static const double kPi = 3.141592653589793238462643383279502884197; static const double kPi = 3.141592653589793238462643383279502884197;
static const float kFloatConversion[256] = { static const float kFloatConversion[256] = {
0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f,
16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f,
32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f, 32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f,
48.0f, 49.0f, 50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, 61.0f, 62.0f, 63.0f, 48.0f, 49.0f, 50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, 61.0f, 62.0f, 63.0f,
64.0f, 65.0f, 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, 76.0f, 77.0f, 78.0f, 79.0f, 64.0f, 65.0f, 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, 76.0f, 77.0f, 78.0f, 79.0f,
80.0f, 81.0f, 82.0f, 83.0f, 84.0f, 85.0f, 86.0f, 87.0f, 88.0f, 89.0f, 90.0f, 91.0f, 92.0f, 93.0f, 94.0f, 95.0f, 80.0f, 81.0f, 82.0f, 83.0f, 84.0f, 85.0f, 86.0f, 87.0f, 88.0f, 89.0f, 90.0f, 91.0f, 92.0f, 93.0f, 94.0f, 95.0f,
96.0f, 97.0f, 98.0f, 99.0f, 100.0f, 101.0f, 102.0f, 103.0f, 104.0f, 105.0f, 106.0f, 107.0f, 108.0f, 109.0f, 110.0f, 111.0f, 96.0f, 97.0f, 98.0f, 99.0f, 100.0f, 101.0f, 102.0f, 103.0f, 104.0f, 105.0f, 106.0f, 107.0f, 108.0f, 109.0f, 110.0f, 111.0f,
112.0f, 113.0f, 114.0f, 115.0f, 116.0f, 117.0f, 118.0f, 119.0f, 120.0f, 121.0f, 122.0f, 123.0f, 124.0f, 125.0f, 126.0f, 127.0f, 112.0f, 113.0f, 114.0f, 115.0f, 116.0f, 117.0f, 118.0f, 119.0f, 120.0f, 121.0f, 122.0f, 123.0f, 124.0f, 125.0f, 126.0f, 127.0f,
128.0f, 129.0f, 130.0f, 131.0f, 132.0f, 133.0f, 134.0f, 135.0f, 136.0f, 137.0f, 138.0f, 139.0f, 140.0f, 141.0f, 142.0f, 143.0f, 128.0f, 129.0f, 130.0f, 131.0f, 132.0f, 133.0f, 134.0f, 135.0f, 136.0f, 137.0f, 138.0f, 139.0f, 140.0f, 141.0f, 142.0f, 143.0f,
144.0f, 145.0f, 146.0f, 147.0f, 148.0f, 149.0f, 150.0f, 151.0f, 152.0f, 153.0f, 154.0f, 155.0f, 156.0f, 157.0f, 158.0f, 159.0f, 144.0f, 145.0f, 146.0f, 147.0f, 148.0f, 149.0f, 150.0f, 151.0f, 152.0f, 153.0f, 154.0f, 155.0f, 156.0f, 157.0f, 158.0f, 159.0f,
160.0f, 161.0f, 162.0f, 163.0f, 164.0f, 165.0f, 166.0f, 167.0f, 168.0f, 169.0f, 170.0f, 171.0f, 172.0f, 173.0f, 174.0f, 175.0f, 160.0f, 161.0f, 162.0f, 163.0f, 164.0f, 165.0f, 166.0f, 167.0f, 168.0f, 169.0f, 170.0f, 171.0f, 172.0f, 173.0f, 174.0f, 175.0f,
176.0f, 177.0f, 178.0f, 179.0f, 180.0f, 181.0f, 182.0f, 183.0f, 184.0f, 185.0f, 186.0f, 187.0f, 188.0f, 189.0f, 190.0f, 191.0f, 176.0f, 177.0f, 178.0f, 179.0f, 180.0f, 181.0f, 182.0f, 183.0f, 184.0f, 185.0f, 186.0f, 187.0f, 188.0f, 189.0f, 190.0f, 191.0f,
192.0f, 193.0f, 194.0f, 195.0f, 196.0f, 197.0f, 198.0f, 199.0f, 200.0f, 201.0f, 202.0f, 203.0f, 204.0f, 205.0f, 206.0f, 207.0f, 192.0f, 193.0f, 194.0f, 195.0f, 196.0f, 197.0f, 198.0f, 199.0f, 200.0f, 201.0f, 202.0f, 203.0f, 204.0f, 205.0f, 206.0f, 207.0f,
208.0f, 209.0f, 210.0f, 211.0f, 212.0f, 213.0f, 214.0f, 215.0f, 216.0f, 217.0f, 218.0f, 219.0f, 220.0f, 221.0f, 222.0f, 223.0f, 208.0f, 209.0f, 210.0f, 211.0f, 212.0f, 213.0f, 214.0f, 215.0f, 216.0f, 217.0f, 218.0f, 219.0f, 220.0f, 221.0f, 222.0f, 223.0f,
224.0f, 225.0f, 226.0f, 227.0f, 228.0f, 229.0f, 230.0f, 231.0f, 232.0f, 233.0f, 234.0f, 235.0f, 236.0f, 237.0f, 238.0f, 239.0f, 224.0f, 225.0f, 226.0f, 227.0f, 228.0f, 229.0f, 230.0f, 231.0f, 232.0f, 233.0f, 234.0f, 235.0f, 236.0f, 237.0f, 238.0f, 239.0f,
240.0f, 241.0f, 242.0f, 243.0f, 244.0f, 245.0f, 246.0f, 247.0f, 248.0f, 249.0f, 250.0f, 251.0f, 252.0f, 253.0f, 254.0f, 255.0f 240.0f, 241.0f, 242.0f, 243.0f, 244.0f, 245.0f, 246.0f, 247.0f, 248.0f, 249.0f, 250.0f, 251.0f, 252.0f, 253.0f, 254.0f, 255.0f
}; };
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
@ -115,41 +115,41 @@ static const float kFloatConversion[256] = {
static inline uint32 CountBitsInMask(uint8 n) { static inline uint32 CountBitsInMask(uint8 n) {
#if defined(_WIN64) || defined(__x86_64__) || defined(NO_INLINE_ASSEMBLY) #if defined(_WIN64) || defined(__x86_64__) || defined(NO_INLINE_ASSEMBLY)
if(!n) return 0; // no bits set if(!n) return 0; // no bits set
if(!(n & (n-1))) return 1; // power of two if(!(n & (n-1))) return 1; // power of two
uint32 c; uint32 c;
for(c = 0; n; c++) { for(c = 0; n; c++) {
n &= n - 1; n &= n - 1;
} }
return c; return c;
#else #else
#ifdef _MSC_VER #ifdef _MSC_VER
__asm { __asm {
mov eax, 8 mov eax, 8
movzx ecx, n movzx ecx, n
bsf ecx, ecx bsf ecx, ecx
sub eax, ecx sub eax, ecx
} }
#else #else
uint32 ans; uint32 ans;
__asm__("movl $8, %%eax;" __asm__("movl $8, %%eax;"
"movzbl %b1, %%ecx;" "movzbl %b1, %%ecx;"
"bsf %%ecx, %%ecx;" "bsf %%ecx, %%ecx;"
"subl %%ecx, %%eax;" "subl %%ecx, %%eax;"
"movl %%eax, %0;" "movl %%eax, %0;"
: "=Q"(ans) : "=Q"(ans)
: "b"(n) : "b"(n)
: "%eax", "%ecx" : "%eax", "%ecx"
); );
return ans; return ans;
#endif #endif
#endif #endif
} }
template <typename ty> template <typename ty>
static inline void clamp(ty &x, const ty &min, const ty &max) { static inline void clamp(ty &x, const ty &min, const ty &max) {
x = (x < min)? min : ((x > max)? max : x); x = (x < min)? min : ((x > max)? max : x);
} }
// absolute distance. It turns out the compiler does a much // absolute distance. It turns out the compiler does a much
@ -157,23 +157,23 @@ static inline void clamp(ty &x, const ty &min, const ty &max) {
// translate the values to/from registers // translate the values to/from registers
static uint8 sad(uint8 a, uint8 b) { static uint8 sad(uint8 a, uint8 b) {
#if 0 #if 0
__asm __asm
{ {
movzx eax, a movzx eax, a
movzx ecx, b movzx ecx, b
sub eax, ecx sub eax, ecx
jns done jns done
neg eax neg eax
done: done:
} }
#else #else
//const INT d = a - b; //const INT d = a - b;
//const INT mask = d >> 31; //const INT mask = d >> 31;
//return (d ^ mask) - mask; //return (d ^ mask) - mask;
// return abs(a - b); // return abs(a - b);
return (a > b)? a - b : b - a; return (a > b)? a - b : b - a;
#endif #endif
} }
@ -186,55 +186,55 @@ done:
uint8 QuantizeChannel(const uint8 val, const uint8 mask, const int pBit) { uint8 QuantizeChannel(const uint8 val, const uint8 mask, const int pBit) {
// If the mask is all the bits, then we can just return the value. // If the mask is all the bits, then we can just return the value.
if(mask == 0xFF) { if(mask == 0xFF) {
return val; return val;
} }
// Otherwise if the mask is no bits then we'll assume that they want // Otherwise if the mask is no bits then we'll assume that they want
// all the bits ... this is only really relevant for alpha... // all the bits ... this is only really relevant for alpha...
if(mask == 0x0) { if(mask == 0x0) {
return 0xFF; return 0xFF;
} }
uint32 prec = CountBitsInMask(mask); uint32 prec = CountBitsInMask(mask);
const uint32 step = 1 << (8 - prec); const uint32 step = 1 << (8 - prec);
assert(step-1 == uint8(~mask)); assert(step-1 == uint8(~mask));
uint32 lval = val & mask; uint32 lval = val & mask;
uint32 hval = lval + step; uint32 hval = lval + step;
if(pBit >= 0) { if(pBit >= 0) {
prec++; prec++;
lval |= !!(pBit) << (8 - prec); lval |= !!(pBit) << (8 - prec);
hval |= !!(pBit) << (8 - prec); hval |= !!(pBit) << (8 - prec);
} }
if(lval > val) { if(lval > val) {
lval -= step; lval -= step;
hval -= step; hval -= step;
} }
lval |= lval >> prec; lval |= lval >> prec;
hval |= hval >> prec; hval |= hval >> prec;
if(sad(val, lval) < sad(val, hval)) if(sad(val, lval) < sad(val, hval))
return lval; return lval;
else else
return hval; return hval;
} }
uint32 RGBAVector::ToPixel(const uint32 channelMask, const int pBit) const { uint32 RGBAVector::ToPixel(const uint32 channelMask, const int pBit) const {
const uint8 pRet0 = QuantizeChannel(uint32(r + 0.5) & 0xFF, channelMask & 0xFF, pBit); const uint8 pRet0 = QuantizeChannel(uint32(r + 0.5) & 0xFF, channelMask & 0xFF, pBit);
const uint8 pRet1 = QuantizeChannel(uint32(g + 0.5) & 0xFF, (channelMask >> 8) & 0xFF, pBit); const uint8 pRet1 = QuantizeChannel(uint32(g + 0.5) & 0xFF, (channelMask >> 8) & 0xFF, pBit);
const uint8 pRet2 = QuantizeChannel(uint32(b + 0.5) & 0xFF, (channelMask >> 16) & 0xFF, pBit); const uint8 pRet2 = QuantizeChannel(uint32(b + 0.5) & 0xFF, (channelMask >> 16) & 0xFF, pBit);
const uint8 pRet3 = QuantizeChannel(uint32(a + 0.5) & 0xFF, (channelMask >> 24) & 0xFF, pBit); const uint8 pRet3 = QuantizeChannel(uint32(a + 0.5) & 0xFF, (channelMask >> 24) & 0xFF, pBit);
const uint32 ret = pRet0 | (pRet1 << 8) | (pRet2 << 16) | (pRet3 << 24); const uint32 ret = pRet0 | (pRet1 << 8) | (pRet2 << 16) | (pRet3 << 24);
return ret; return ret;
} }
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
@ -244,85 +244,85 @@ uint32 RGBAVector::ToPixel(const uint32 channelMask, const int pBit) const {
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
RGBAMatrix &RGBAMatrix::operator *=(const RGBAMatrix &mat) { RGBAMatrix &RGBAMatrix::operator *=(const RGBAMatrix &mat) {
*this = ((*this) * mat); *this = ((*this) * mat);
return (*this); return (*this);
} }
RGBAMatrix RGBAMatrix::operator *(const RGBAMatrix &mat) const { RGBAMatrix RGBAMatrix::operator *(const RGBAMatrix &mat) const {
RGBAMatrix result; RGBAMatrix result;
for(int i = 0; i < 4; i++) { for(int i = 0; i < 4; i++) {
for(int j = 0; j < 4; j++) { for(int j = 0; j < 4; j++) {
result(i, j) = 0.0f; result(i, j) = 0.0f;
for(int k = 0; k < 4; k++) { for(int k = 0; k < 4; k++) {
result(i, j) += m[i*4 + k] * mat.m[k*4 + j]; result(i, j) += m[i*4 + k] * mat.m[k*4 + j];
} }
} }
} }
return result; return result;
} }
RGBAVector RGBAMatrix::operator *(const RGBAVector &p) const { RGBAVector RGBAMatrix::operator *(const RGBAVector &p) const {
return RGBAVector ( return RGBAVector (
p.x * m1 + p.y * m2 + p.z * m3 + p.w * m4, p.x * m1 + p.y * m2 + p.z * m3 + p.w * m4,
p.x * m5 + p.y * m6 + p.z * m7 + p.w * m8, p.x * m5 + p.y * m6 + p.z * m7 + p.w * m8,
p.x * m9 + p.y * m10 + p.z * m11 + p.w * m12, p.x * m9 + p.y * m10 + p.z * m11 + p.w * m12,
p.x * m13 + p.y * m14 + p.z * m15 + p.w * m16 p.x * m13 + p.y * m14 + p.z * m15 + p.w * m16
); );
} }
RGBAMatrix RGBAMatrix::RotateX(float rad) { RGBAMatrix RGBAMatrix::RotateX(float rad) {
RGBAMatrix result; RGBAMatrix result;
result.m6 = result.m11 = cos(rad); result.m6 = result.m11 = cos(rad);
result.m10 = sin(rad); result.m10 = sin(rad);
result.m7 = -result.m10; result.m7 = -result.m10;
return result; return result;
} }
RGBAMatrix RGBAMatrix::RotateY(float rad) { RGBAMatrix RGBAMatrix::RotateY(float rad) {
RGBAMatrix result; RGBAMatrix result;
result.m1 = result.m11 = cos(rad); result.m1 = result.m11 = cos(rad);
result.m3 = sin(rad); result.m3 = sin(rad);
result.m9 = -result.m3; result.m9 = -result.m3;
return result; return result;
} }
RGBAMatrix RGBAMatrix::RotateZ(float rad) { RGBAMatrix RGBAMatrix::RotateZ(float rad) {
RGBAMatrix result; RGBAMatrix result;
result.m1 = result.m6 = cos(rad); result.m1 = result.m6 = cos(rad);
result.m5 = sin(rad); result.m5 = sin(rad);
result.m2 = -result.m5; result.m2 = -result.m5;
return result; return result;
} }
RGBAMatrix RGBAMatrix::Translate(const RGBAVector &t) { RGBAMatrix RGBAMatrix::Translate(const RGBAVector &t) {
RGBAMatrix result; RGBAMatrix result;
result.m4 = t.x; result.m4 = t.x;
result.m8 = t.y; result.m8 = t.y;
result.m12 = t.z; result.m12 = t.z;
result.m16 = t.w; result.m16 = t.w;
return result; return result;
} }
bool RGBAMatrix::Identity() { bool RGBAMatrix::Identity() {
for(int i = 0; i < 4; i++) { for(int i = 0; i < 4; i++) {
for(int j = 0; j < 4; j++) { for(int j = 0; j < 4; j++) {
if(i == j) { if(i == j) {
if(fabs(m[i*4 + j] - 1.0f) > 1e-5) if(fabs(m[i*4 + j] - 1.0f) > 1e-5)
return false; return false;
} }
else { else {
if(fabs(m[i*4 + j]) > 1e-5) if(fabs(m[i*4 + j]) > 1e-5)
return false; return false;
} }
} }
} }
return true; return true;
} }
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
@ -332,45 +332,45 @@ bool RGBAMatrix::Identity() {
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
RGBACluster::RGBACluster(const RGBACluster &left, const RGBACluster &right) { RGBACluster::RGBACluster(const RGBACluster &left, const RGBACluster &right) {
*this = left; *this = left;
for(uint32 i = 0; i < right.m_NumPoints; i++) { for(uint32 i = 0; i < right.m_NumPoints; i++) {
const RGBAVector &p = right.m_DataPoints[i]; const RGBAVector &p = right.m_DataPoints[i];
AddPoint(p); AddPoint(p);
} }
m_PrincipalAxisCached = false; m_PrincipalAxisCached = false;
} }
void RGBACluster::AddPoint(const RGBAVector &p) { void RGBACluster::AddPoint(const RGBAVector &p) {
assert(m_NumPoints < kMaxNumDataPoints); assert(m_NumPoints < kMaxNumDataPoints);
m_Total += p; m_Total += p;
m_DataPoints[m_NumPoints++] = p; m_DataPoints[m_NumPoints++] = p;
m_PointBitString |= 1 << p.GetIdx(); m_PointBitString |= 1 << p.GetIdx();
for(uint32 i = 0; i < kNumColorChannels; i++) { for(uint32 i = 0; i < kNumColorChannels; i++) {
m_Min.c[i] = min(p.c[i], m_Min.c[i]); m_Min.c[i] = min(p.c[i], m_Min.c[i]);
m_Max.c[i] = max(p.c[i], m_Max.c[i]); m_Max.c[i] = max(p.c[i], m_Max.c[i]);
} }
} }
void RGBACluster::GetPrincipalAxis(RGBADir &axis) { void RGBACluster::GetPrincipalAxis(RGBADir &axis) {
if(m_PrincipalAxisCached) { if(m_PrincipalAxisCached) {
axis = m_PrincipalAxis; axis = m_PrincipalAxis;
return; return;
} }
m_PowerMethodIterations = ::GetPrincipalAxis( m_PowerMethodIterations = ::GetPrincipalAxis(
m_NumPoints, m_NumPoints,
m_DataPoints, m_DataPoints,
m_PrincipalAxis, m_PrincipalAxis,
m_PrincipalEigenvalue, m_PrincipalEigenvalue,
&m_SecondEigenvalue &m_SecondEigenvalue
); );
m_PrincipalAxisCached = true; m_PrincipalAxisCached = true;
GetPrincipalAxis(axis); GetPrincipalAxis(axis);
} }
double RGBACluster::GetPrincipalEigenvalue() { double RGBACluster::GetPrincipalEigenvalue() {
@ -408,74 +408,74 @@ uint32 RGBACluster::GetPowerMethodIterations() {
double RGBACluster::QuantizedError(const RGBAVector &p1, const RGBAVector &p2, uint8 nBuckets, uint32 bitMask, const RGBAVector &errorMetricVec, const int pbits[2], int *indices) const { double RGBACluster::QuantizedError(const RGBAVector &p1, const RGBAVector &p2, uint8 nBuckets, uint32 bitMask, const RGBAVector &errorMetricVec, const int pbits[2], int *indices) const {
// nBuckets should be a power of two. // nBuckets should be a power of two.
assert(nBuckets == 3 || !(nBuckets & (nBuckets - 1))); assert(nBuckets == 3 || !(nBuckets & (nBuckets - 1)));
const uint8 indexPrec = (nBuckets == 3)? 3 : 8-CountBitsInMask(~(nBuckets - 1)); const uint8 indexPrec = (nBuckets == 3)? 3 : 8-CountBitsInMask(~(nBuckets - 1));
typedef uint32 tInterpPair[2]; typedef uint32 tInterpPair[2];
typedef tInterpPair tInterpLevel[16]; typedef tInterpPair tInterpLevel[16];
const tInterpLevel *interpVals = (nBuckets == 3)? kBC7InterpolationValues : kBC7InterpolationValues + (indexPrec - 1); const tInterpLevel *interpVals = (nBuckets == 3)? kBC7InterpolationValues : kBC7InterpolationValues + (indexPrec - 1);
assert(indexPrec >= 2 && indexPrec <= 4); assert(indexPrec >= 2 && indexPrec <= 4);
uint32 qp1, qp2; uint32 qp1, qp2;
if(pbits) { if(pbits) {
qp1 = p1.ToPixel(bitMask, pbits[0]); qp1 = p1.ToPixel(bitMask, pbits[0]);
qp2 = p2.ToPixel(bitMask, pbits[1]); qp2 = p2.ToPixel(bitMask, pbits[1]);
} }
else { else {
qp1 = p1.ToPixel(bitMask); qp1 = p1.ToPixel(bitMask);
qp2 = p2.ToPixel(bitMask); qp2 = p2.ToPixel(bitMask);
} }
uint8 *pqp1 = (uint8 *)&qp1; uint8 *pqp1 = (uint8 *)&qp1;
uint8 *pqp2 = (uint8 *)&qp2; uint8 *pqp2 = (uint8 *)&qp2;
const RGBAVector metric = errorMetricVec; const RGBAVector metric = errorMetricVec;
float totalError = 0.0; float totalError = 0.0;
for(uint32 i = 0; i < m_NumPoints; i++) { for(uint32 i = 0; i < m_NumPoints; i++) {
const uint32 pixel = m_DataPoints[i].ToPixel(); const uint32 pixel = m_DataPoints[i].ToPixel();
const uint8 *pb = (const uint8 *)(&pixel); const uint8 *pb = (const uint8 *)(&pixel);
float minError = FLT_MAX; float minError = FLT_MAX;
int bestBucket = -1; int bestBucket = -1;
for(int j = 0; j < nBuckets; j++) { for(int j = 0; j < nBuckets; j++) {
uint32 interp0 = (*interpVals)[j][0]; uint32 interp0 = (*interpVals)[j][0];
uint32 interp1 = (*interpVals)[j][1]; uint32 interp1 = (*interpVals)[j][1];
RGBAVector errorVec (0.0f); RGBAVector errorVec (0.0f);
for(uint32 k = 0; k < kNumColorChannels; k++) { for(uint32 k = 0; k < kNumColorChannels; k++) {
const uint8 ip = (((uint32(pqp1[k]) * interp0) + (uint32(pqp2[k]) * interp1) + 32) >> 6) & 0xFF; const uint8 ip = (((uint32(pqp1[k]) * interp0) + (uint32(pqp2[k]) * interp1) + 32) >> 6) & 0xFF;
const uint8 dist = sad(pb[k], ip); const uint8 dist = sad(pb[k], ip);
errorVec.c[k] = kFloatConversion[dist] * metric.c[k]; errorVec.c[k] = kFloatConversion[dist] * metric.c[k];
} }
float error = errorVec * errorVec; float error = errorVec * errorVec;
if(error < minError) { if(error < minError) {
minError = error; minError = error;
bestBucket = j; bestBucket = j;
} }
// Conceptually, once the error starts growing, it doesn't stop growing (we're moving // Conceptually, once the error starts growing, it doesn't stop growing (we're moving
// farther away from the reference point along the line). Hence we can early out here. // farther away from the reference point along the line). Hence we can early out here.
// However, quanitzation artifacts mean that this is not ALWAYS the case, so we do suffer // However, quanitzation artifacts mean that this is not ALWAYS the case, so we do suffer
// about 0.01 RMS error. // about 0.01 RMS error.
else if(error > minError) { else if(error > minError) {
break; break;
} }
} }
totalError += minError; totalError += minError;
assert(bestBucket >= 0); assert(bestBucket >= 0);
if(indices) indices[i] = bestBucket; if(indices) indices[i] = bestBucket;
} }
return totalError; return totalError;
} }
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
@ -485,175 +485,174 @@ double RGBACluster::QuantizedError(const RGBAVector &p1, const RGBAVector &p2, u
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
void ClampEndpoints(RGBAVector &p1, RGBAVector &p2) { void ClampEndpoints(RGBAVector &p1, RGBAVector &p2) {
clamp(p1.r, 0.0f, 255.0f); clamp(p1.r, 0.0f, 255.0f);
clamp(p1.g, 0.0f, 255.0f); clamp(p1.g, 0.0f, 255.0f);
clamp(p1.b, 0.0f, 255.0f); clamp(p1.b, 0.0f, 255.0f);
clamp(p1.a, 0.0f, 255.0f); clamp(p1.a, 0.0f, 255.0f);
clamp(p2.r, 0.0f, 255.0f); clamp(p2.r, 0.0f, 255.0f);
clamp(p2.g, 0.0f, 255.0f); clamp(p2.g, 0.0f, 255.0f);
clamp(p2.b, 0.0f, 255.0f); clamp(p2.b, 0.0f, 255.0f);
clamp(p2.a, 0.0f, 255.0f); clamp(p2.a, 0.0f, 255.0f);
} }
static uint32 PowerIteration(const RGBAMatrix &mat, RGBADir &eigVec, double &eigVal) { static uint32 PowerIteration(const RGBAMatrix &mat, RGBADir &eigVec, double &eigVal) {
int numIterations = 0; int numIterations = 0;
const int kMaxNumIterations = 200; const int kMaxNumIterations = 200;
for(int nTries = 0; nTries < 3; nTries++) { for(int nTries = 0; nTries < 3; nTries++) {
// !SPEED! Find eigenvectors by using the power method. This is good because the // !SPEED! Find eigenvectors by using the power method. This is good because the
// matrix is only 4x4, which allows us to use SIMD... // matrix is only 4x4, which allows us to use SIMD...
RGBAVector b = RGBAVector(float(rand()) + 1.0f); RGBAVector b = RGBAVector(float(rand()) + 1.0f);
b /= b.Length(); b /= b.Length();
bool fixed = false; bool fixed = false;
numIterations = 0; numIterations = 0;
while(!fixed && ++numIterations < kMaxNumIterations) { while(!fixed && ++numIterations < kMaxNumIterations) {
RGBAVector newB = mat * b; RGBAVector newB = mat * b;
// !HACK! If the principal eigenvector of the covariance matrix // !HACK! If the principal eigenvector of the covariance matrix
// converges to zero, that means that the points lie equally // converges to zero, that means that the points lie equally
// spaced on a sphere in this space. In this (extremely rare) // spaced on a sphere in this space. In this (extremely rare)
// situation, just choose a point and use it as the principal // situation, just choose a point and use it as the principal
// direction. // direction.
const float newBlen = newB.Length(); const float newBlen = newB.Length();
if(newBlen < 1e-10) { if(newBlen < 1e-10) {
eigVec = b; eigVec = b;
eigVal = 0.0; eigVal = 0.0;
return numIterations; return numIterations;
} }
eigVal = newB.Length(); eigVal = newB.Length();
newB /= float(eigVal); newB /= float(eigVal);
if(fabs(1.0f - (b * newB)) < 1e-5) if(fabs(1.0f - (b * newB)) < 1e-5)
fixed = true; fixed = true;
b = newB; b = newB;
}
eigVec = b;
if(numIterations < kMaxNumIterations) {
break;
}
} }
if(numIterations == kMaxNumIterations) { eigVec = b;
eigVal = 0.0; if(numIterations < kMaxNumIterations) {
} break;
return numIterations; }
}
if(numIterations == kMaxNumIterations) {
eigVal = 0.0;
}
return numIterations;
} }
uint32 GetPrincipalAxis(uint32 nPts, const RGBAVector *pts, RGBADir &axis, double &eigOne, double *eigTwo) { uint32 GetPrincipalAxis(uint32 nPts, const RGBAVector *pts, RGBADir &axis, double &eigOne, double *eigTwo) {
assert(nPts <= kMaxNumDataPoints); assert(nPts <= kMaxNumDataPoints);
RGBAVector avg (0.0f); RGBAVector avg (0.0f);
for(uint32 i = 0; i < nPts; i++) { for(uint32 i = 0; i < nPts; i++) {
avg += pts[i]; avg += pts[i];
} }
avg /= float(nPts); avg /= float(nPts);
// We use these vectors for calculating the covariance matrix... // We use these vectors for calculating the covariance matrix...
RGBAVector toPts[kMaxNumDataPoints]; RGBAVector toPts[kMaxNumDataPoints];
RGBAVector toPtsMax(-FLT_MAX); RGBAVector toPtsMax(-FLT_MAX);
for(uint32 i = 0; i < nPts; i++) { for(uint32 i = 0; i < nPts; i++) {
toPts[i] = pts[i] - avg; toPts[i] = pts[i] - avg;
for(uint32 j = 0; j < kNumColorChannels; j++) { for(uint32 j = 0; j < kNumColorChannels; j++) {
toPtsMax.c[j] = max(toPtsMax.c[j], toPts[i].c[j]); toPtsMax.c[j] = max(toPtsMax.c[j], toPts[i].c[j]);
} }
} }
// Generate a list of unique points... // Generate a list of unique points...
RGBAVector upts[kMaxNumDataPoints]; RGBAVector upts[kMaxNumDataPoints];
uint32 uptsIdx = 0; uint32 uptsIdx = 0;
for(uint32 i = 0; i < nPts; i++) { for(uint32 i = 0; i < nPts; i++) {
bool hasPt = false; bool hasPt = false;
for(uint32 j = 0; j < uptsIdx; j++) { for(uint32 j = 0; j < uptsIdx; j++) {
if(upts[j] == pts[i]) if(upts[j] == pts[i])
hasPt = true; hasPt = true;
} }
if(!hasPt) { if(!hasPt) {
upts[uptsIdx++] = pts[i]; upts[uptsIdx++] = pts[i];
} }
} }
assert(uptsIdx > 0); assert(uptsIdx > 0);
if(uptsIdx == 1) { if(uptsIdx == 1) {
axis.r = axis.g = axis.b = axis.a = 0.0f; axis.r = axis.g = axis.b = axis.a = 0.0f;
return 0; return 0;
}
// Collinear?
else {
RGBADir dir (upts[1] - upts[0]); // Collinear?
bool collinear = true; } else {
for(uint32 i = 2; i < nPts; i++) { RGBADir dir (upts[1] - upts[0]);
RGBAVector v = (upts[i] - upts[0]); bool collinear = true;
if(fabs(fabs(v*dir) - v.Length()) > 1e-7) { for(uint32 i = 2; i < nPts; i++) {
collinear = false; RGBAVector v = (upts[i] - upts[0]);
break; if(fabs(fabs(v*dir) - v.Length()) > 1e-7) {
} collinear = false;
} break;
}
}
if(collinear) { if(collinear) {
axis = dir; axis = dir;
return 0; return 0;
} }
} }
RGBAMatrix covMatrix; RGBAMatrix covMatrix;
// Compute covariance. // Compute covariance.
for(uint32 i = 0; i < kNumColorChannels; i++) { for(uint32 i = 0; i < kNumColorChannels; i++) {
for(uint32 j = 0; j <= i; j++) { for(uint32 j = 0; j <= i; j++) {
float sum = 0.0; float sum = 0.0;
for(uint32 k = 0; k < nPts; k++) { for(uint32 k = 0; k < nPts; k++) {
sum += toPts[k].c[i] * toPts[k].c[j]; sum += toPts[k].c[i] * toPts[k].c[j];
} }
covMatrix(i, j) = sum / kFloatConversion[kNumColorChannels - 1]; covMatrix(i, j) = sum / kFloatConversion[kNumColorChannels - 1];
covMatrix(j, i) = covMatrix(i, j); covMatrix(j, i) = covMatrix(i, j);
} }
} }
uint32 iters = PowerIteration(covMatrix, axis, eigOne); uint32 iters = PowerIteration(covMatrix, axis, eigOne);
if(NULL != eigTwo) { if(NULL != eigTwo) {
if(eigOne != 0.0) { if(eigOne != 0.0) {
RGBAMatrix reduced = covMatrix - eigOne * RGBAMatrix( RGBAMatrix reduced = covMatrix - eigOne * RGBAMatrix(
axis.c[0] * axis.c[0], axis.c[0] * axis.c[1], axis.c[0] * axis.c[2], axis.c[0] * axis.c[3], axis.c[0] * axis.c[0], axis.c[0] * axis.c[1], axis.c[0] * axis.c[2], axis.c[0] * axis.c[3],
axis.c[1] * axis.c[0], axis.c[1] * axis.c[1], axis.c[1] * axis.c[2], axis.c[1] * axis.c[3], axis.c[1] * axis.c[0], axis.c[1] * axis.c[1], axis.c[1] * axis.c[2], axis.c[1] * axis.c[3],
axis.c[2] * axis.c[0], axis.c[2] * axis.c[1], axis.c[2] * axis.c[2], axis.c[2] * axis.c[3], axis.c[2] * axis.c[0], axis.c[2] * axis.c[1], axis.c[2] * axis.c[2], axis.c[2] * axis.c[3],
axis.c[3] * axis.c[0], axis.c[3] * axis.c[1], axis.c[3] * axis.c[2], axis.c[3] * axis.c[3] axis.c[3] * axis.c[0], axis.c[3] * axis.c[1], axis.c[3] * axis.c[2], axis.c[3] * axis.c[3]
); );
bool allZero = true; bool allZero = true;
for(uint32 i = 0; i < 16; i++) { for(uint32 i = 0; i < 16; i++) {
if(fabs(reduced[i]) > 0.0005) { if(fabs(reduced[i]) > 0.0005) {
allZero = false; allZero = false;
}
}
if(allZero) {
*eigTwo = 0.0;
}
else {
RGBADir dummyDir;
iters += PowerIteration(reduced, dummyDir, *eigTwo);
}
}
else {
*eigTwo = 0.0;
}
} }
}
return iters; if(allZero) {
*eigTwo = 0.0;
}
else {
RGBADir dummyDir;
iters += PowerIteration(reduced, dummyDir, *eigTwo);
}
}
else {
*eigTwo = 0.0;
}
}
return iters;
} }

View file

@ -78,260 +78,260 @@ static const uint32 kMaxNumDataPoints = 16;
class RGBAVector { class RGBAVector {
public: public:
union { union {
struct { float r, g, b, a; }; struct { float r, g, b, a; };
struct { float x, y, z, w; }; struct { float x, y, z, w; };
float c[4]; float c[4];
}; };
uint32 GetIdx() const { return idx; } uint32 GetIdx() const { return idx; }
RGBAVector() : r(-1.0), g(-1.0), b(-1.0), a(-1.0) { } RGBAVector() : r(-1.0), g(-1.0), b(-1.0), a(-1.0) { }
RGBAVector(uint32 _idx, uint32 pixel) : RGBAVector(uint32 _idx, uint32 pixel) :
r(float(pixel & 0xFF)), r(float(pixel & 0xFF)),
g(float((pixel >> 8) & 0xFF)), g(float((pixel >> 8) & 0xFF)),
b(float((pixel >> 16) & 0xFF)), b(float((pixel >> 16) & 0xFF)),
a(float((pixel >> 24) & 0xFF)), a(float((pixel >> 24) & 0xFF)),
idx(_idx) idx(_idx)
{ } { }
RGBAVector(float _r, float _g, float _b, float _a) : RGBAVector(float _r, float _g, float _b, float _a) :
r(_r), g(_g), b(_b), a(_a), idx(0) { } r(_r), g(_g), b(_b), a(_a), idx(0) { }
explicit RGBAVector(float cc) : r(cc), g(cc), b(cc), a(cc), idx(0) { } explicit RGBAVector(float cc) : r(cc), g(cc), b(cc), a(cc), idx(0) { }
RGBAVector &operator =(const RGBAVector &other) { RGBAVector &operator =(const RGBAVector &other) {
this->idx = other.idx; this->idx = other.idx;
memcpy(c, other.c, sizeof(c)); memcpy(c, other.c, sizeof(c));
return (*this); return (*this);
} }
RGBAVector operator +(const RGBAVector &p) const { RGBAVector operator +(const RGBAVector &p) const {
return RGBAVector(r + p.r, g + p.g, b + p.b, a + p.a); return RGBAVector(r + p.r, g + p.g, b + p.b, a + p.a);
} }
RGBAVector &operator +=(const RGBAVector &p) { RGBAVector &operator +=(const RGBAVector &p) {
r += p.r; g += p.g; b += p.b; a += p.a; r += p.r; g += p.g; b += p.b; a += p.a;
return *this; return *this;
} }
RGBAVector operator -(const RGBAVector &p) const { RGBAVector operator -(const RGBAVector &p) const {
return RGBAVector(r - p.r, g - p.g, b - p.b, a - p.a); return RGBAVector(r - p.r, g - p.g, b - p.b, a - p.a);
} }
RGBAVector &operator -=(const RGBAVector &p) { RGBAVector &operator -=(const RGBAVector &p) {
r -= p.r; g -= p.g; b -= p.b; a -= p.a; r -= p.r; g -= p.g; b -= p.b; a -= p.a;
return *this; return *this;
} }
RGBAVector operator /(const float s) const { RGBAVector operator /(const float s) const {
return RGBAVector(r / s, g / s, b / s, a / s); return RGBAVector(r / s, g / s, b / s, a / s);
} }
RGBAVector &operator /=(const float s) { RGBAVector &operator /=(const float s) {
r /= s; g /= s; b /= s; a /= s; r /= s; g /= s; b /= s; a /= s;
return *this; return *this;
} }
float operator *(const RGBAVector &p) const { float operator *(const RGBAVector &p) const {
return r * p.r + g * p.g + b * p.b + a * p.a; return r * p.r + g * p.g + b * p.b + a * p.a;
} }
float Length() const { float Length() const {
return sqrt((*this) * (*this)); return sqrt((*this) * (*this));
} }
RGBAVector &operator *=(const RGBAVector &v) { RGBAVector &operator *=(const RGBAVector &v) {
r *= v.r; g *= v.g; b *= v.b; a *= v.a; r *= v.r; g *= v.g; b *= v.b; a *= v.a;
return *this; return *this;
} }
RGBAVector operator *(const float s) const { RGBAVector operator *(const float s) const {
return RGBAVector(r * s, g * s, b * s, a * s); return RGBAVector(r * s, g * s, b * s, a * s);
} }
friend RGBAVector operator *(const float s, const RGBAVector &p) { friend RGBAVector operator *(const float s, const RGBAVector &p) {
return RGBAVector(p.r * s, p.g * s, p.b * s, p.a * s); return RGBAVector(p.r * s, p.g * s, p.b * s, p.a * s);
} }
RGBAVector &operator *=(const float s) { RGBAVector &operator *=(const float s) {
r *= s; g *= s; b *= s; a *= s; r *= s; g *= s; b *= s; a *= s;
return *this; return *this;
} }
float &operator [](const int i) { float &operator [](const int i) {
return c[i]; return c[i];
} }
friend bool operator ==(const RGBAVector &rhs, const RGBAVector &lhs) { friend bool operator ==(const RGBAVector &rhs, const RGBAVector &lhs) {
const RGBAVector d = rhs - lhs; const RGBAVector d = rhs - lhs;
return fabs(d.r) < 1e-7 && fabs(d.g) < 1e-7 && fabs(d.b) < 1e-7 && fabs(d.a) < 1e-7; return fabs(d.r) < 1e-7 && fabs(d.g) < 1e-7 && fabs(d.b) < 1e-7 && fabs(d.a) < 1e-7;
} }
friend bool operator !=(const RGBAVector &rhs, const RGBAVector &lhs) { friend bool operator !=(const RGBAVector &rhs, const RGBAVector &lhs) {
return !(rhs == lhs); return !(rhs == lhs);
} }
operator float *() { operator float *() {
return c; return c;
} }
RGBAVector Cross(const RGBAVector &rhs) { RGBAVector Cross(const RGBAVector &rhs) {
return RGBAVector( return RGBAVector(
rhs.y * z - y * rhs.z, rhs.y * z - y * rhs.z,
rhs.z * x - z * rhs.x, rhs.z * x - z * rhs.x,
rhs.x * y - x * rhs.y, rhs.x * y - x * rhs.y,
1.0f 1.0f
); );
} }
// Quantize this point. // Quantize this point.
uint32 ToPixel(const uint32 channelMask = 0xFFFFFFFF, const int pBit = -1) const; uint32 ToPixel(const uint32 channelMask = 0xFFFFFFFF, const int pBit = -1) const;
private: private:
uint32 idx; uint32 idx;
}; };
class RGBAMatrix { class RGBAMatrix {
private: private:
union { union {
float m[kNumColorChannels*kNumColorChannels]; float m[kNumColorChannels*kNumColorChannels];
struct { struct {
float m1, m2, m3, m4; float m1, m2, m3, m4;
float m5, m6, m7, m8; float m5, m6, m7, m8;
float m9, m10, m11, m12; float m9, m10, m11, m12;
float m13, m14, m15, m16; float m13, m14, m15, m16;
}; };
}; };
RGBAMatrix(const float *arr) { RGBAMatrix(const float *arr) {
memcpy(m, arr, sizeof(m)); memcpy(m, arr, sizeof(m));
} }
public: public:
RGBAMatrix( RGBAMatrix(
float _m1, float _m2, float _m3, float _m4, float _m1, float _m2, float _m3, float _m4,
float _m5, float _m6, float _m7, float _m8, float _m5, float _m6, float _m7, float _m8,
float _m9, float _m10, float _m11, float _m12, float _m9, float _m10, float _m11, float _m12,
float _m13, float _m14, float _m15, float _m16 float _m13, float _m14, float _m15, float _m16
) : ) :
m1(_m1), m2(_m2), m3(_m3), m4(_m4), m1(_m1), m2(_m2), m3(_m3), m4(_m4),
m5(_m5), m6(_m6), m7(_m7), m8(_m8), m5(_m5), m6(_m6), m7(_m7), m8(_m8),
m9(_m9), m10(_m10), m11(_m11), m12(_m12), m9(_m9), m10(_m10), m11(_m11), m12(_m12),
m13(_m13), m14(_m14), m15(_m15), m16(_m16) m13(_m13), m14(_m14), m15(_m15), m16(_m16)
{ } { }
RGBAMatrix() : RGBAMatrix() :
m1(1.0f), m2(0.0f), m3(0.0f), m4(0.0f), m1(1.0f), m2(0.0f), m3(0.0f), m4(0.0f),
m5(0.0f), m6(1.0f), m7(0.0f), m8(0.0f), m5(0.0f), m6(1.0f), m7(0.0f), m8(0.0f),
m9(0.0f), m10(0.0f), m11(1.0f), m12(0.0f), m9(0.0f), m10(0.0f), m11(1.0f), m12(0.0f),
m13(0.0f), m14(0.0f), m15(0.0f), m16(1.0f) m13(0.0f), m14(0.0f), m15(0.0f), m16(1.0f)
{ } { }
RGBAMatrix &operator =(const RGBAMatrix &other) { RGBAMatrix &operator =(const RGBAMatrix &other) {
memcpy(m, other.m, sizeof(m)); memcpy(m, other.m, sizeof(m));
return (*this); return (*this);
} }
RGBAMatrix operator +(const RGBAMatrix &p) const { RGBAMatrix operator +(const RGBAMatrix &p) const {
float newm[kNumColorChannels*kNumColorChannels]; float newm[kNumColorChannels*kNumColorChannels];
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] + p.m[i]; for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] + p.m[i];
return RGBAMatrix(newm); return RGBAMatrix(newm);
} }
RGBAMatrix &operator +=(const RGBAMatrix &p) { RGBAMatrix &operator +=(const RGBAMatrix &p) {
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] += p.m[i]; for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] += p.m[i];
return *this; return *this;
} }
RGBAMatrix operator -(const RGBAMatrix &p) const { RGBAMatrix operator -(const RGBAMatrix &p) const {
float newm[kNumColorChannels*kNumColorChannels]; float newm[kNumColorChannels*kNumColorChannels];
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] - p.m[i]; for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] - p.m[i];
return RGBAMatrix(newm); return RGBAMatrix(newm);
} }
RGBAMatrix &operator -=(const RGBAMatrix &p) { RGBAMatrix &operator -=(const RGBAMatrix &p) {
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] -= p.m[i]; for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] -= p.m[i];
return *this; return *this;
} }
RGBAMatrix operator /(const float s) const { RGBAMatrix operator /(const float s) const {
float newm[kNumColorChannels*kNumColorChannels]; float newm[kNumColorChannels*kNumColorChannels];
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] / s; for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] / s;
return RGBAMatrix(newm); return RGBAMatrix(newm);
} }
RGBAMatrix &operator /=(const float s) { RGBAMatrix &operator /=(const float s) {
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] /= s; for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] /= s;
return *this; return *this;
} }
RGBAMatrix operator *(const float s) const { RGBAMatrix operator *(const float s) const {
float newm[kNumColorChannels*kNumColorChannels]; float newm[kNumColorChannels*kNumColorChannels];
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] * s; for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] * s;
return RGBAMatrix(newm); return RGBAMatrix(newm);
} }
RGBAMatrix operator *(const double s) const { RGBAMatrix operator *(const double s) const {
float newm[kNumColorChannels*kNumColorChannels]; float newm[kNumColorChannels*kNumColorChannels];
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = float(double(m[i]) * s); for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = float(double(m[i]) * s);
return RGBAMatrix(newm); return RGBAMatrix(newm);
} }
friend RGBAMatrix operator *(const float s, const RGBAMatrix &p) { friend RGBAMatrix operator *(const float s, const RGBAMatrix &p) {
float newm[kNumColorChannels*kNumColorChannels]; float newm[kNumColorChannels*kNumColorChannels];
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = p.m[i] * s; for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = p.m[i] * s;
return RGBAMatrix(newm); return RGBAMatrix(newm);
} }
friend RGBAMatrix operator *(const double s, const RGBAMatrix &p) { friend RGBAMatrix operator *(const double s, const RGBAMatrix &p) {
float newm[kNumColorChannels*kNumColorChannels]; float newm[kNumColorChannels*kNumColorChannels];
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = float(double(p.m[i]) * s); for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = float(double(p.m[i]) * s);
return RGBAMatrix(newm); return RGBAMatrix(newm);
} }
RGBAMatrix &operator *=(const float s) { RGBAMatrix &operator *=(const float s) {
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] *= s; for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] *= s;
return *this; return *this;
} }
float &operator ()(const int i, const int j) { float &operator ()(const int i, const int j) {
return (*this)[i*4 + j]; return (*this)[i*4 + j];
} }
float &operator [](const int i) { float &operator [](const int i) {
return m[i]; return m[i];
} }
friend bool operator ==(const RGBAMatrix &rhs, const RGBAMatrix &lhs) { friend bool operator ==(const RGBAMatrix &rhs, const RGBAMatrix &lhs) {
const RGBAMatrix d = rhs - lhs; const RGBAMatrix d = rhs - lhs;
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++)
if(d.m[i] > 1e-10) if(d.m[i] > 1e-10)
return false; return false;
return true; return true;
} }
operator float *() { operator float *() {
return m; return m;
} }
RGBAVector operator *(const RGBAVector &p) const; RGBAVector operator *(const RGBAVector &p) const;
RGBAMatrix operator *(const RGBAMatrix &mat) const; RGBAMatrix operator *(const RGBAMatrix &mat) const;
RGBAMatrix &operator *=(const RGBAMatrix &mat); RGBAMatrix &operator *=(const RGBAMatrix &mat);
static RGBAMatrix RotateX(float rad); static RGBAMatrix RotateX(float rad);
static RGBAMatrix RotateY(float rad); static RGBAMatrix RotateY(float rad);
static RGBAMatrix RotateZ(float rad); static RGBAMatrix RotateZ(float rad);
static RGBAMatrix Translate(const RGBAVector &t); static RGBAMatrix Translate(const RGBAVector &t);
bool Identity(); bool Identity();
}; };
class RGBADir : public RGBAVector { class RGBADir : public RGBAVector {
public: public:
RGBADir() : RGBAVector() { } RGBADir() : RGBAVector() { }
RGBADir(const RGBAVector &p) : RGBAVector(p) { RGBADir(const RGBAVector &p) : RGBAVector(p) {
*this /= Length(); *this /= Length();
} }
}; };
// Makes sure that the values of the endpoints lie between 0 and 1. // Makes sure that the values of the endpoints lie between 0 and 1.
@ -340,83 +340,83 @@ extern void ClampEndpoints(RGBAVector &p1, RGBAVector &p2);
class RGBACluster { class RGBACluster {
public: public:
RGBACluster() : RGBACluster() :
m_NumPoints(0), m_Total(0), m_NumPoints(0), m_Total(0),
m_PointBitString(0), m_PointBitString(0),
m_Min(FLT_MAX), m_Min(FLT_MAX),
m_Max(-FLT_MAX), m_Max(-FLT_MAX),
m_PrincipalAxisCached(false) m_PrincipalAxisCached(false)
{ } { }
RGBACluster(const RGBACluster &c) : RGBACluster(const RGBACluster &c) :
m_NumPoints(c.m_NumPoints), m_NumPoints(c.m_NumPoints),
m_Total(c.m_Total), m_Total(c.m_Total),
m_PointBitString(c.m_PointBitString), m_PointBitString(c.m_PointBitString),
m_Min(c.m_Min), m_Min(c.m_Min),
m_Max(c.m_Max), m_Max(c.m_Max),
m_PrincipalAxisCached(c.m_PrincipalAxisCached), m_PrincipalAxisCached(c.m_PrincipalAxisCached),
m_PrincipalEigenvalue(c.m_PrincipalEigenvalue), m_PrincipalEigenvalue(c.m_PrincipalEigenvalue),
m_SecondEigenvalue(c.m_SecondEigenvalue), m_SecondEigenvalue(c.m_SecondEigenvalue),
m_PowerMethodIterations(c.m_PowerMethodIterations), m_PowerMethodIterations(c.m_PowerMethodIterations),
m_PrincipalAxis(c.m_PrincipalAxis) m_PrincipalAxis(c.m_PrincipalAxis)
{ {
memcpy(this->m_DataPoints, c.m_DataPoints, m_NumPoints * sizeof(RGBAVector)); memcpy(this->m_DataPoints, c.m_DataPoints, m_NumPoints * sizeof(RGBAVector));
} }
RGBACluster(const RGBACluster &left, const RGBACluster &right); RGBACluster(const RGBACluster &left, const RGBACluster &right);
RGBACluster(const RGBAVector &p) : RGBACluster(const RGBAVector &p) :
m_NumPoints(1), m_NumPoints(1),
m_Total(p), m_Total(p),
m_PointBitString(0), m_PointBitString(0),
m_Min(p), m_Max(p), m_Min(p), m_Max(p),
m_PrincipalAxisCached(false) m_PrincipalAxisCached(false)
{ {
m_DataPoints[0] = p; m_DataPoints[0] = p;
m_PointBitString |= (1 << p.GetIdx()); m_PointBitString |= (1 << p.GetIdx());
} }
RGBAVector GetTotal() const { return m_Total; } RGBAVector GetTotal() const { return m_Total; }
const RGBAVector &GetPoint(int idx) const { return m_DataPoints[idx]; } const RGBAVector &GetPoint(int idx) const { return m_DataPoints[idx]; }
uint32 GetNumPoints() const { return m_NumPoints; } uint32 GetNumPoints() const { return m_NumPoints; }
RGBAVector GetAvg() const { return m_Total / float(m_NumPoints); } RGBAVector GetAvg() const { return m_Total / float(m_NumPoints); }
const RGBAVector *GetPoints() const { return m_DataPoints; } const RGBAVector *GetPoints() const { return m_DataPoints; }
void AddPoint(const RGBAVector &p); void AddPoint(const RGBAVector &p);
void GetBoundingBox(RGBAVector &Min, RGBAVector &Max) const { void GetBoundingBox(RGBAVector &Min, RGBAVector &Max) const {
Min = m_Min, Max = m_Max; Min = m_Min, Max = m_Max;
} }
// Returns the error if we were to quantize the colors right now with the given number of buckets and bit mask. // Returns the error if we were to quantize the colors right now with the given number of buckets and bit mask.
double QuantizedError(const RGBAVector &p1, const RGBAVector &p2, uint8 nBuckets, uint32 bitMask, const RGBAVector &errorMetricVec, const int pbits[2] = NULL, int *indices = NULL) const; double QuantizedError(const RGBAVector &p1, const RGBAVector &p2, uint8 nBuckets, uint32 bitMask, const RGBAVector &errorMetricVec, const int pbits[2] = NULL, int *indices = NULL) const;
// Returns the principal axis for this point cluster. // Returns the principal axis for this point cluster.
double GetPrincipalEigenvalue(); double GetPrincipalEigenvalue();
double GetSecondEigenvalue(); double GetSecondEigenvalue();
uint32 GetPowerMethodIterations(); uint32 GetPowerMethodIterations();
void GetPrincipalAxis(RGBADir &axis); void GetPrincipalAxis(RGBADir &axis);
bool AllSamePoint() const { return m_Max == m_Min; } bool AllSamePoint() const { return m_Max == m_Min; }
int GetPointBitString() const { return m_PointBitString; } int GetPointBitString() const { return m_PointBitString; }
private: private:
// The number of points in the cluster. // The number of points in the cluster.
uint32 m_NumPoints; uint32 m_NumPoints;
RGBAVector m_Total; RGBAVector m_Total;
// The points in the cluster. // The points in the cluster.
RGBAVector m_DataPoints[kMaxNumDataPoints]; RGBAVector m_DataPoints[kMaxNumDataPoints];
int m_PointBitString; int m_PointBitString;
RGBAVector m_Min, m_Max; RGBAVector m_Min, m_Max;
bool m_PrincipalAxisCached; bool m_PrincipalAxisCached;
double m_PrincipalEigenvalue; double m_PrincipalEigenvalue;
double m_SecondEigenvalue; double m_SecondEigenvalue;
uint32 m_PowerMethodIterations; uint32 m_PowerMethodIterations;
RGBADir m_PrincipalAxis; RGBADir m_PrincipalAxis;
}; };
extern uint8 QuantizeChannel(const uint8 val, const uint8 mask, const int pBit = -1); extern uint8 QuantizeChannel(const uint8 val, const uint8 mask, const int pBit = -1);

View file

@ -92,37 +92,37 @@ static inline uint32 popcnt32(uint32 x) {
/* Original scalar implementation: /* Original scalar implementation:
// If the mask is all the bits, then we can just return the value. // If the mask is all the bits, then we can just return the value.
if(mask == 0xFF) { if(mask == 0xFF) {
return val; return val;
} }
uint32 prec = CountBitsInMask(mask); uint32 prec = CountBitsInMask(mask);
const uint32 step = 1 << (8 - prec); const uint32 step = 1 << (8 - prec);
assert(step-1 == uint8(~mask)); assert(step-1 == uint8(~mask));
uint32 lval = val & mask; uint32 lval = val & mask;
uint32 hval = lval + step; uint32 hval = lval + step;
if(pBit >= 0) { if(pBit >= 0) {
prec++; prec++;
lval |= !!(pBit) << (8 - prec); lval |= !!(pBit) << (8 - prec);
hval |= !!(pBit) << (8 - prec); hval |= !!(pBit) << (8 - prec);
} }
if(lval > val) { if(lval > val) {
lval -= step; lval -= step;
hval -= step; hval -= step;
} }
lval |= lval >> prec; lval |= lval >> prec;
hval |= hval >> prec; hval |= hval >> prec;
if(sad(val, lval) < sad(val, hval)) if(sad(val, lval) < sad(val, hval))
return lval; return lval;
else else
return hval; return hval;
*/ */
// !TODO! AVX2 supports an instruction known as vsllv, which shifts a vector // !TODO! AVX2 supports an instruction known as vsllv, which shifts a vector
@ -158,114 +158,114 @@ static const ALIGN_SSE uint32 kThirtyTwoVector[4] = { 32, 32, 32, 32 };
static const __m128i kByteValMask = _mm_set_epi32(0xFF, 0xFF, 0xFF, 0xFF); static const __m128i kByteValMask = _mm_set_epi32(0xFF, 0xFF, 0xFF, 0xFF);
static inline __m128i sad(const __m128i &a, const __m128i &b) { static inline __m128i sad(const __m128i &a, const __m128i &b) {
const __m128i maxab = _mm_max_epu8(a, b); const __m128i maxab = _mm_max_epu8(a, b);
const __m128i minab = _mm_min_epu8(a, b); const __m128i minab = _mm_min_epu8(a, b);
return _mm_and_si128( kByteValMask, _mm_subs_epu8( maxab, minab ) ); return _mm_and_si128( kByteValMask, _mm_subs_epu8( maxab, minab ) );
} }
__m128i RGBAVectorSIMD::ToPixel(const __m128i &qmask) const { __m128i RGBAVectorSIMD::ToPixel(const __m128i &qmask) const {
// !SPEED! We should figure out a way to get rid of these scalar operations. // !SPEED! We should figure out a way to get rid of these scalar operations.
#ifdef HAS_SSE_POPCNT #ifdef HAS_SSE_POPCNT
const uint32 prec = _mm_popcnt_u32(((uint32 *)(&qmask))[0]); const uint32 prec = _mm_popcnt_u32(((uint32 *)(&qmask))[0]);
#else #else
const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]); const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]);
#endif #endif
assert(r >= 0.0f && r <= 255.0f); assert(r >= 0.0f && r <= 255.0f);
assert(g >= 0.0f && g <= 255.0f); assert(g >= 0.0f && g <= 255.0f);
assert(b >= 0.0f && b <= 255.0f); assert(b >= 0.0f && b <= 255.0f);
assert(a >= 0.0f && a <= 255.0f); assert(a >= 0.0f && a <= 255.0f);
assert(((uint32 *)(&qmask))[3] == 0xFF || ((uint32 *)(&qmask))[3] == ((uint32 *)(&qmask))[0]); assert(((uint32 *)(&qmask))[3] == 0xFF || ((uint32 *)(&qmask))[3] == ((uint32 *)(&qmask))[0]);
assert(((uint32 *)(&qmask))[2] == ((uint32 *)(&qmask))[1] && ((uint32 *)(&qmask))[0] == ((uint32 *)(&qmask))[1]); assert(((uint32 *)(&qmask))[2] == ((uint32 *)(&qmask))[1] && ((uint32 *)(&qmask))[0] == ((uint32 *)(&qmask))[1]);
const __m128i val = _mm_cvtps_epi32( _mm_add_ps(kHalfVector, vec) ); const __m128i val = _mm_cvtps_epi32( _mm_add_ps(kHalfVector, vec) );
const __m128i step = _mm_slli_epi32( kOneVector, 8 - prec ); const __m128i step = _mm_slli_epi32( kOneVector, 8 - prec );
const __m128i &mask = qmask; const __m128i &mask = qmask;
__m128i lval = _mm_and_si128(val, mask); __m128i lval = _mm_and_si128(val, mask);
__m128i hval = _mm_add_epi32(lval, step); __m128i hval = _mm_add_epi32(lval, step);
const __m128i lvalShift = _mm_srli_epi32(lval, prec); const __m128i lvalShift = _mm_srli_epi32(lval, prec);
const __m128i hvalShift = _mm_srli_epi32(hval, prec); const __m128i hvalShift = _mm_srli_epi32(hval, prec);
lval = _mm_or_si128(lval, lvalShift); lval = _mm_or_si128(lval, lvalShift);
hval = _mm_or_si128(hval, hvalShift); hval = _mm_or_si128(hval, hvalShift);
const __m128i lvald = _mm_sub_epi32( val, lval ); const __m128i lvald = _mm_sub_epi32( val, lval );
const __m128i hvald = _mm_sub_epi32( hval, val ); const __m128i hvald = _mm_sub_epi32( hval, val );
const __m128i vd = _mm_cmplt_epi32(lvald, hvald); const __m128i vd = _mm_cmplt_epi32(lvald, hvald);
__m128i ans = _mm_blendv_epi8(hval, lval, vd); __m128i ans = _mm_blendv_epi8(hval, lval, vd);
const __m128i chanExact = _mm_cmpeq_epi32(mask, kByteValMask); const __m128i chanExact = _mm_cmpeq_epi32(mask, kByteValMask);
ans = _mm_blendv_epi8( ans, val, chanExact ); ans = _mm_blendv_epi8( ans, val, chanExact );
return ans; return ans;
} }
__m128i RGBAVectorSIMD::ToPixel(const __m128i &qmask, const int pBit) const { __m128i RGBAVectorSIMD::ToPixel(const __m128i &qmask, const int pBit) const {
// !SPEED! We should figure out a way to get rid of these scalar operations. // !SPEED! We should figure out a way to get rid of these scalar operations.
#ifdef HAS_SSE_POPCNT #ifdef HAS_SSE_POPCNT
const uint32 prec = _mm_popcnt_u32(((uint32 *)(&qmask))[0]); const uint32 prec = _mm_popcnt_u32(((uint32 *)(&qmask))[0]);
#else #else
const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]); const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]);
#endif #endif
assert(r >= 0.0f && r <= 255.0f); assert(r >= 0.0f && r <= 255.0f);
assert(g >= 0.0f && g <= 255.0f); assert(g >= 0.0f && g <= 255.0f);
assert(b >= 0.0f && b <= 255.0f); assert(b >= 0.0f && b <= 255.0f);
assert(a >= 0.0f && a <= 255.0f); assert(a >= 0.0f && a <= 255.0f);
assert(((uint32 *)(&qmask))[3] == 0xFF || ((uint32 *)(&qmask))[3] == ((uint32 *)(&qmask))[0]); assert(((uint32 *)(&qmask))[3] == 0xFF || ((uint32 *)(&qmask))[3] == ((uint32 *)(&qmask))[0]);
assert(((uint32 *)(&qmask))[2] == ((uint32 *)(&qmask))[1] && ((uint32 *)(&qmask))[0] == ((uint32 *)(&qmask))[1]); assert(((uint32 *)(&qmask))[2] == ((uint32 *)(&qmask))[1] && ((uint32 *)(&qmask))[0] == ((uint32 *)(&qmask))[1]);
const __m128i val = _mm_cvtps_epi32( _mm_add_ps(kHalfVector, vec) ); const __m128i val = _mm_cvtps_epi32( _mm_add_ps(kHalfVector, vec) );
const __m128i pbit = _mm_set1_epi32(!!pBit); const __m128i pbit = _mm_set1_epi32(!!pBit);
const __m128i &mask = qmask; // _mm_set_epi32(alphaMask, channelMask, channelMask, channelMask); const __m128i &mask = qmask; // _mm_set_epi32(alphaMask, channelMask, channelMask, channelMask);
const __m128i step = _mm_slli_epi32( kOneVector, 8 - prec ); const __m128i step = _mm_slli_epi32( kOneVector, 8 - prec );
__m128i lval = _mm_and_si128( val, mask ); __m128i lval = _mm_and_si128( val, mask );
__m128i hval = _mm_add_epi32( lval, step ); __m128i hval = _mm_add_epi32( lval, step );
const __m128i pBitShifted = _mm_slli_epi32(pbit, 7 - prec); const __m128i pBitShifted = _mm_slli_epi32(pbit, 7 - prec);
lval = _mm_or_si128(lval, pBitShifted ); lval = _mm_or_si128(lval, pBitShifted );
hval = _mm_or_si128(hval, pBitShifted); hval = _mm_or_si128(hval, pBitShifted);
// These next three lines we make sure that after adding the pbit that val is // These next three lines we make sure that after adding the pbit that val is
// still in between lval and hval. If it isn't, then we subtract a // still in between lval and hval. If it isn't, then we subtract a
// step from both. Now, val should be larger than lval and less than // step from both. Now, val should be larger than lval and less than
// hval, but certain situations make this not always the case (e.g. val // hval, but certain situations make this not always the case (e.g. val
// is 0, precision is 4 bits, and pbit is 1). Hence, we add back the // is 0, precision is 4 bits, and pbit is 1). Hence, we add back the
// step if it goes below zero, making it equivalent to hval and so it // step if it goes below zero, making it equivalent to hval and so it
// doesn't matter which we choose. // doesn't matter which we choose.
{ {
__m128i cmp = _mm_cmpgt_epi32(lval, val); __m128i cmp = _mm_cmpgt_epi32(lval, val);
cmp = _mm_mullo_epi32(cmp, step); cmp = _mm_mullo_epi32(cmp, step);
lval = _mm_add_epi32(lval, cmp); lval = _mm_add_epi32(lval, cmp);
hval = _mm_add_epi32(hval, cmp); hval = _mm_add_epi32(hval, cmp);
cmp = _mm_cmplt_epi32(lval, kZeroVector); cmp = _mm_cmplt_epi32(lval, kZeroVector);
cmp = _mm_mullo_epi32(cmp, step); cmp = _mm_mullo_epi32(cmp, step);
lval = _mm_sub_epi32(lval, cmp); lval = _mm_sub_epi32(lval, cmp);
} }
const __m128i lvalShift = _mm_srli_epi32(lval, prec + 1); const __m128i lvalShift = _mm_srli_epi32(lval, prec + 1);
const __m128i hvalShift = _mm_srli_epi32(hval, prec + 1); const __m128i hvalShift = _mm_srli_epi32(hval, prec + 1);
lval = _mm_or_si128(lval, lvalShift); lval = _mm_or_si128(lval, lvalShift);
hval = _mm_or_si128(hval, hvalShift); hval = _mm_or_si128(hval, hvalShift);
const __m128i lvald = _mm_sub_epi32( val, lval ); const __m128i lvald = _mm_sub_epi32( val, lval );
const __m128i hvald = _mm_sub_epi32( hval, val ); const __m128i hvald = _mm_sub_epi32( hval, val );
const __m128i vd = _mm_cmplt_epi32(lvald, hvald); const __m128i vd = _mm_cmplt_epi32(lvald, hvald);
__m128i ans = _mm_blendv_epi8(hval, lval, vd); __m128i ans = _mm_blendv_epi8(hval, lval, vd);
const __m128i chanExact = _mm_cmpeq_epi32(mask, kByteValMask); const __m128i chanExact = _mm_cmpeq_epi32(mask, kByteValMask);
ans = _mm_blendv_epi8( ans, val, chanExact ); ans = _mm_blendv_epi8( ans, val, chanExact );
return ans; return ans;
} }
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
@ -276,17 +276,17 @@ __m128i RGBAVectorSIMD::ToPixel(const __m128i &qmask, const int pBit) const {
RGBAVectorSIMD RGBAMatrixSIMD::operator *(const RGBAVectorSIMD &p) const { RGBAVectorSIMD RGBAMatrixSIMD::operator *(const RGBAVectorSIMD &p) const {
__m128 xVec = _mm_set1_ps( p.x ); __m128 xVec = _mm_set1_ps( p.x );
__m128 yVec = _mm_set1_ps( p.y ); __m128 yVec = _mm_set1_ps( p.y );
__m128 zVec = _mm_set1_ps( p.z ); __m128 zVec = _mm_set1_ps( p.z );
__m128 wVec = _mm_set1_ps( p.w ); __m128 wVec = _mm_set1_ps( p.w );
__m128 vec1 = _mm_mul_ps( xVec, col[0] ); __m128 vec1 = _mm_mul_ps( xVec, col[0] );
__m128 vec2 = _mm_mul_ps( yVec, col[1] ); __m128 vec2 = _mm_mul_ps( yVec, col[1] );
__m128 vec3 = _mm_mul_ps( zVec, col[2] ); __m128 vec3 = _mm_mul_ps( zVec, col[2] );
__m128 vec4 = _mm_mul_ps( wVec, col[3] ); __m128 vec4 = _mm_mul_ps( wVec, col[3] );
return RGBAVectorSIMD( _mm_add_ps( _mm_add_ps( vec1, vec2 ), _mm_add_ps( vec3, vec4 ) ) ); return RGBAVectorSIMD( _mm_add_ps( _mm_add_ps( vec1, vec2 ), _mm_add_ps( vec3, vec4 ) ) );
} }
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
@ -297,104 +297,104 @@ RGBAVectorSIMD RGBAMatrixSIMD::operator *(const RGBAVectorSIMD &p) const {
RGBAClusterSIMD::RGBAClusterSIMD(const RGBAClusterSIMD &left, const RGBAClusterSIMD &right) { RGBAClusterSIMD::RGBAClusterSIMD(const RGBAClusterSIMD &left, const RGBAClusterSIMD &right) {
assert(!(left.m_PointBitString & right.m_PointBitString)); assert(!(left.m_PointBitString & right.m_PointBitString));
*this = left; *this = left;
for(int i = 0; i < right.m_NumPoints; i++) { for(int i = 0; i < right.m_NumPoints; i++) {
const RGBAVectorSIMD &p = right.m_DataPoints[i]; const RGBAVectorSIMD &p = right.m_DataPoints[i];
assert(m_NumPoints < kMaxNumDataPoints); assert(m_NumPoints < kMaxNumDataPoints);
m_Total += p; m_Total += p;
m_DataPoints[m_NumPoints++] = p; m_DataPoints[m_NumPoints++] = p;
m_Min.vec = _mm_min_ps(m_Min.vec, p.vec); m_Min.vec = _mm_min_ps(m_Min.vec, p.vec);
m_Max.vec = _mm_max_ps(m_Max.vec, p.vec); m_Max.vec = _mm_max_ps(m_Max.vec, p.vec);
} }
m_PointBitString = left.m_PointBitString | right.m_PointBitString; m_PointBitString = left.m_PointBitString | right.m_PointBitString;
m_PrincipalAxisCached = false; m_PrincipalAxisCached = false;
} }
void RGBAClusterSIMD::AddPoint(const RGBAVectorSIMD &p, int idx) { void RGBAClusterSIMD::AddPoint(const RGBAVectorSIMD &p, int idx) {
assert(m_NumPoints < kMaxNumDataPoints); assert(m_NumPoints < kMaxNumDataPoints);
m_Total += p; m_Total += p;
m_DataPoints[m_NumPoints++] = p; m_DataPoints[m_NumPoints++] = p;
m_PointBitString |= 1 << idx; m_PointBitString |= 1 << idx;
m_Min.vec = _mm_min_ps(m_Min.vec, p.vec); m_Min.vec = _mm_min_ps(m_Min.vec, p.vec);
m_Max.vec = _mm_max_ps(m_Max.vec, p.vec); m_Max.vec = _mm_max_ps(m_Max.vec, p.vec);
} }
float RGBAClusterSIMD::QuantizedError(const RGBAVectorSIMD &p1, const RGBAVectorSIMD &p2, const uint8 nBuckets, const __m128i &bitMask, const int pbits[2], __m128i *indices) const { float RGBAClusterSIMD::QuantizedError(const RGBAVectorSIMD &p1, const RGBAVectorSIMD &p2, const uint8 nBuckets, const __m128i &bitMask, const int pbits[2], __m128i *indices) const {
// nBuckets should be a power of two. // nBuckets should be a power of two.
assert(!(nBuckets & (nBuckets - 1))); assert(!(nBuckets & (nBuckets - 1)));
#ifdef HAS_SSE_POPCNT #ifdef HAS_SSE_POPCNT
const uint8 indexPrec = 8-_mm_popcnt_u32(~(nBuckets - 1) & 0xFF); const uint8 indexPrec = 8-_mm_popcnt_u32(~(nBuckets - 1) & 0xFF);
#else #else
const uint8 indexPrec = 8-popcnt32(~(nBuckets - 1) & 0xFF); const uint8 indexPrec = 8-popcnt32(~(nBuckets - 1) & 0xFF);
#endif #endif
assert(indexPrec >= 2 && indexPrec <= 4); assert(indexPrec >= 2 && indexPrec <= 4);
typedef __m128i tInterpPair[2]; typedef __m128i tInterpPair[2];
typedef tInterpPair tInterpLevel[16]; typedef tInterpPair tInterpLevel[16];
const tInterpLevel *interpVals = kBC7InterpolationValuesSIMD + (indexPrec - 1); const tInterpLevel *interpVals = kBC7InterpolationValuesSIMD + (indexPrec - 1);
__m128i qp1, qp2; __m128i qp1, qp2;
if(pbits) { if(pbits) {
qp1 = p1.ToPixel(bitMask, pbits[0]); qp1 = p1.ToPixel(bitMask, pbits[0]);
qp2 = p2.ToPixel(bitMask, pbits[1]); qp2 = p2.ToPixel(bitMask, pbits[1]);
} }
else { else {
qp1 = p1.ToPixel(bitMask); qp1 = p1.ToPixel(bitMask);
qp2 = p2.ToPixel(bitMask); qp2 = p2.ToPixel(bitMask);
} }
__m128 errorMetricVec = _mm_load_ps( BC7C::GetErrorMetric() ); __m128 errorMetricVec = _mm_load_ps( BC7C::GetErrorMetric() );
__m128 totalError = kZero; __m128 totalError = kZero;
for(int i = 0; i < m_NumPoints; i++) { for(int i = 0; i < m_NumPoints; i++) {
const __m128i pixel = m_DataPoints[i].ToPixel( kByteValMask ); const __m128i pixel = m_DataPoints[i].ToPixel( kByteValMask );
__m128 minError = _mm_set1_ps(FLT_MAX); __m128 minError = _mm_set1_ps(FLT_MAX);
__m128i bestBucket = _mm_set1_epi32(-1); __m128i bestBucket = _mm_set1_epi32(-1);
for(int j = 0; j < nBuckets; j++) { for(int j = 0; j < nBuckets; j++) {
const __m128i jVec = _mm_set1_epi32(j); const __m128i jVec = _mm_set1_epi32(j);
const __m128i interp0 = (*interpVals)[j][0]; const __m128i interp0 = (*interpVals)[j][0];
const __m128i interp1 = (*interpVals)[j][1]; const __m128i interp1 = (*interpVals)[j][1];
const __m128i ip0 = _mm_mullo_epi32( qp1, interp0 ); const __m128i ip0 = _mm_mullo_epi32( qp1, interp0 );
const __m128i ip1 = _mm_mullo_epi32( qp2, interp1 ); const __m128i ip1 = _mm_mullo_epi32( qp2, interp1 );
const __m128i ip = _mm_add_epi32( *((const __m128i *)kThirtyTwoVector), _mm_add_epi32( ip0, ip1 ) ); const __m128i ip = _mm_add_epi32( *((const __m128i *)kThirtyTwoVector), _mm_add_epi32( ip0, ip1 ) );
const __m128i dist = sad( _mm_and_si128( _mm_srli_epi32( ip, 6 ), kByteValMask ), pixel ); const __m128i dist = sad( _mm_and_si128( _mm_srli_epi32( ip, 6 ), kByteValMask ), pixel );
__m128 errorVec = _mm_cvtepi32_ps( dist ); __m128 errorVec = _mm_cvtepi32_ps( dist );
errorVec = _mm_mul_ps( errorVec, errorMetricVec ); errorVec = _mm_mul_ps( errorVec, errorMetricVec );
errorVec = _mm_mul_ps( errorVec, errorVec ); errorVec = _mm_mul_ps( errorVec, errorVec );
errorVec = _mm_hadd_ps( errorVec, errorVec ); errorVec = _mm_hadd_ps( errorVec, errorVec );
errorVec = _mm_hadd_ps( errorVec, errorVec ); errorVec = _mm_hadd_ps( errorVec, errorVec );
const __m128 cmp = _mm_cmple_ps( errorVec, minError ); const __m128 cmp = _mm_cmple_ps( errorVec, minError );
minError = _mm_blendv_ps( minError, errorVec, cmp ); minError = _mm_blendv_ps( minError, errorVec, cmp );
bestBucket = _mm_blendv_epi8( bestBucket, jVec, _mm_castps_si128( cmp ) ); bestBucket = _mm_blendv_epi8( bestBucket, jVec, _mm_castps_si128( cmp ) );
// Conceptually, once the error starts growing, it doesn't stop growing (we're moving // Conceptually, once the error starts growing, it doesn't stop growing (we're moving
// farther away from the reference point along the line). Hence we can early out here. // farther away from the reference point along the line). Hence we can early out here.
// However, quanitzation artifacts mean that this is not ALWAYS the case, so we do suffer // However, quanitzation artifacts mean that this is not ALWAYS the case, so we do suffer
// about 0.01 RMS error. // about 0.01 RMS error.
if(!((uint8 *)(&cmp))[0]) if(!((uint8 *)(&cmp))[0])
break; break;
} }
totalError = _mm_add_ps(totalError, minError); totalError = _mm_add_ps(totalError, minError);
if(indices) ((uint32 *)indices)[i] = ((uint32 *)(&bestBucket))[0]; if(indices) ((uint32 *)indices)[i] = ((uint32 *)(&bestBucket))[0];
} }
return ((float *)(&totalError))[0]; return ((float *)(&totalError))[0];
} }
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
@ -404,69 +404,69 @@ float RGBAClusterSIMD::QuantizedError(const RGBAVectorSIMD &p1, const RGBAVector
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
void ClampEndpoints(RGBAVectorSIMD &p1, RGBAVectorSIMD &p2) { void ClampEndpoints(RGBAVectorSIMD &p1, RGBAVectorSIMD &p2) {
p1.vec = _mm_min_ps( kByteMax, _mm_max_ps( p1.vec, kZero ) ); p1.vec = _mm_min_ps( kByteMax, _mm_max_ps( p1.vec, kZero ) );
p2.vec = _mm_min_ps( kByteMax, _mm_max_ps( p2.vec, kZero ) ); p2.vec = _mm_min_ps( kByteMax, _mm_max_ps( p2.vec, kZero ) );
} }
void GetPrincipalAxis(const RGBAClusterSIMD &c, RGBADirSIMD &axis) { void GetPrincipalAxis(const RGBAClusterSIMD &c, RGBADirSIMD &axis) {
if(c.GetNumPoints() == 2) { if(c.GetNumPoints() == 2) {
axis = c.GetPoint(1) - c.GetPoint(0); axis = c.GetPoint(1) - c.GetPoint(0);
return; return;
} }
RGBAVectorSIMD avg = c.GetTotal(); RGBAVectorSIMD avg = c.GetTotal();
avg /= float(c.GetNumPoints()); avg /= float(c.GetNumPoints());
// We use these vectors for calculating the covariance matrix... // We use these vectors for calculating the covariance matrix...
RGBAVectorSIMD toPts[kMaxNumDataPoints]; RGBAVectorSIMD toPts[kMaxNumDataPoints];
RGBAVectorSIMD toPtsMax(-FLT_MAX); RGBAVectorSIMD toPtsMax(-FLT_MAX);
for(int i = 0; i < c.GetNumPoints(); i++) { for(int i = 0; i < c.GetNumPoints(); i++) {
toPts[i] = c.GetPoint(i) - avg; toPts[i] = c.GetPoint(i) - avg;
toPtsMax.vec = _mm_max_ps(toPtsMax.vec, toPts[i].vec); toPtsMax.vec = _mm_max_ps(toPtsMax.vec, toPts[i].vec);
} }
RGBAMatrixSIMD covMatrix; RGBAMatrixSIMD covMatrix;
// Compute covariance. // Compute covariance.
const float fNumPoints = float(c.GetNumPoints()); const float fNumPoints = float(c.GetNumPoints());
for(int i = 0; i < kNumColorChannels; i++) { for(int i = 0; i < kNumColorChannels; i++) {
for(int j = 0; j <= i; j++) { for(int j = 0; j <= i; j++) {
float sum = 0.0; float sum = 0.0;
for(int k = 0; k < c.GetNumPoints(); k++) { for(int k = 0; k < c.GetNumPoints(); k++) {
sum += toPts[k].c[i] * toPts[k].c[j]; sum += toPts[k].c[i] * toPts[k].c[j];
} }
covMatrix(i, j) = sum / fNumPoints; covMatrix(i, j) = sum / fNumPoints;
covMatrix(j, i) = covMatrix(i, j); covMatrix(j, i) = covMatrix(i, j);
} }
} }
// !SPEED! Find eigenvectors by using the power method. This is good because the // !SPEED! Find eigenvectors by using the power method. This is good because the
// matrix is only 4x4, which allows us to use SIMD... // matrix is only 4x4, which allows us to use SIMD...
RGBAVectorSIMD b = toPtsMax; RGBAVectorSIMD b = toPtsMax;
assert(b.Length() > 0); assert(b.Length() > 0);
b /= b.Length(); b /= b.Length();
RGBAVectorSIMD newB = covMatrix * b; RGBAVectorSIMD newB = covMatrix * b;
// !HACK! If the principal eigenvector of the covariance matrix // !HACK! If the principal eigenvector of the covariance matrix
// converges to zero, that means that the points lie equally // converges to zero, that means that the points lie equally
// spaced on a sphere in this space. In this (extremely rare) // spaced on a sphere in this space. In this (extremely rare)
// situation, just choose a point and use it as the principal // situation, just choose a point and use it as the principal
// direction. // direction.
const float newBlen = newB.Length(); const float newBlen = newB.Length();
if(newBlen < 1e-10) { if(newBlen < 1e-10) {
axis = toPts[0]; axis = toPts[0];
return; return;
} }
for(int i = 0; i < 8; i++) { for(int i = 0; i < 8; i++) {
newB = covMatrix * b; newB = covMatrix * b;
newB.Normalize(); newB.Normalize();
b = newB; b = newB;
} }
axis = b; axis = b;
} }

View file

@ -81,270 +81,270 @@ static const __m128 kEpsilonSIMD = _mm_set1_ps(1e-8f);
class RGBAVectorSIMD { class RGBAVectorSIMD {
public: public:
union { union {
struct { float r, g, b, a; }; struct { float r, g, b, a; };
struct { float x, y, z, w; }; struct { float x, y, z, w; };
float c[4]; float c[4];
__m128 vec; __m128 vec;
}; };
RGBAVectorSIMD() : r(-1.0), g(-1.0), b(-1.0), a(-1.0) { } RGBAVectorSIMD() : r(-1.0), g(-1.0), b(-1.0), a(-1.0) { }
RGBAVectorSIMD(uint32 pixel) : RGBAVectorSIMD(uint32 pixel) :
r(float(pixel & 0xFF)), r(float(pixel & 0xFF)),
g(float((pixel >> 8) & 0xFF)), g(float((pixel >> 8) & 0xFF)),
b(float((pixel >> 16) & 0xFF)), b(float((pixel >> 16) & 0xFF)),
a(float((pixel >> 24) & 0xFF)) a(float((pixel >> 24) & 0xFF))
{ } { }
explicit RGBAVectorSIMD(float _r, float _g, float _b, float _a) : explicit RGBAVectorSIMD(float _r, float _g, float _b, float _a) :
r(_r), g(_g), b(_b), a(_a) { } r(_r), g(_g), b(_b), a(_a) { }
explicit RGBAVectorSIMD(float cc) : r(cc), g(cc), b(cc), a(cc) { } explicit RGBAVectorSIMD(float cc) : r(cc), g(cc), b(cc), a(cc) { }
RGBAVectorSIMD (const __m128 &newVec) : vec(newVec) { } RGBAVectorSIMD (const __m128 &newVec) : vec(newVec) { }
RGBAVectorSIMD (const RGBAVectorSIMD &other) : vec(other.vec) { } RGBAVectorSIMD (const RGBAVectorSIMD &other) : vec(other.vec) { }
RGBAVectorSIMD operator +(const RGBAVectorSIMD &p) const { RGBAVectorSIMD operator +(const RGBAVectorSIMD &p) const {
return RGBAVectorSIMD( _mm_add_ps(this->vec, p.vec) ); return RGBAVectorSIMD( _mm_add_ps(this->vec, p.vec) );
} }
RGBAVectorSIMD &operator +=(const RGBAVectorSIMD &p) { RGBAVectorSIMD &operator +=(const RGBAVectorSIMD &p) {
this->vec = _mm_add_ps(this->vec, p.vec); this->vec = _mm_add_ps(this->vec, p.vec);
return *this; return *this;
} }
RGBAVectorSIMD operator -(const RGBAVectorSIMD &p) const { RGBAVectorSIMD operator -(const RGBAVectorSIMD &p) const {
return RGBAVectorSIMD( _mm_sub_ps(this->vec, p.vec) ); return RGBAVectorSIMD( _mm_sub_ps(this->vec, p.vec) );
} }
RGBAVectorSIMD &operator -=(const RGBAVectorSIMD &p) { RGBAVectorSIMD &operator -=(const RGBAVectorSIMD &p) {
this->vec = _mm_sub_ps(this->vec, p.vec); this->vec = _mm_sub_ps(this->vec, p.vec);
return *this; return *this;
} }
RGBAVectorSIMD operator /(const float s) const { RGBAVectorSIMD operator /(const float s) const {
return RGBAVectorSIMD( _mm_div_ps(this->vec, _mm_set1_ps(s) ) ); return RGBAVectorSIMD( _mm_div_ps(this->vec, _mm_set1_ps(s) ) );
} }
RGBAVectorSIMD &operator /=(const float s) { RGBAVectorSIMD &operator /=(const float s) {
this->vec = _mm_div_ps(this->vec, _mm_set1_ps(s) ); this->vec = _mm_div_ps(this->vec, _mm_set1_ps(s) );
return *this; return *this;
} }
float operator *(const RGBAVectorSIMD &p) const { float operator *(const RGBAVectorSIMD &p) const {
__m128 mul = _mm_mul_ps(this->vec, p.vec); __m128 mul = _mm_mul_ps(this->vec, p.vec);
mul = _mm_hadd_ps(mul, mul); mul = _mm_hadd_ps(mul, mul);
mul = _mm_hadd_ps(mul, mul); mul = _mm_hadd_ps(mul, mul);
return ((float *)(&mul))[0]; return ((float *)(&mul))[0];
} }
void Normalize() { void Normalize() {
__m128 rsqrt = _mm_rsqrt_ps( _mm_set1_ps( (*this) * (*this) ) ); __m128 rsqrt = _mm_rsqrt_ps( _mm_set1_ps( (*this) * (*this) ) );
vec = _mm_mul_ps( vec, rsqrt ); vec = _mm_mul_ps( vec, rsqrt );
} }
float Length() const { float Length() const {
return sqrt((*this) * (*this)); return sqrt((*this) * (*this));
} }
RGBAVectorSIMD &operator *=(const RGBAVectorSIMD &v) { RGBAVectorSIMD &operator *=(const RGBAVectorSIMD &v) {
this->vec = _mm_mul_ps(this->vec, v.vec); this->vec = _mm_mul_ps(this->vec, v.vec);
return *this; return *this;
} }
RGBAVectorSIMD operator *(const float s) const { RGBAVectorSIMD operator *(const float s) const {
return RGBAVectorSIMD( _mm_mul_ps( this->vec, _mm_set1_ps(s) ) ); return RGBAVectorSIMD( _mm_mul_ps( this->vec, _mm_set1_ps(s) ) );
} }
friend RGBAVectorSIMD operator *(const float s, const RGBAVectorSIMD &p) { friend RGBAVectorSIMD operator *(const float s, const RGBAVectorSIMD &p) {
return RGBAVectorSIMD( _mm_mul_ps( p.vec, _mm_set1_ps(s) ) ); return RGBAVectorSIMD( _mm_mul_ps( p.vec, _mm_set1_ps(s) ) );
} }
RGBAVectorSIMD &operator *=(const float s) { RGBAVectorSIMD &operator *=(const float s) {
this->vec = _mm_mul_ps( this->vec, _mm_set1_ps(s) ); this->vec = _mm_mul_ps( this->vec, _mm_set1_ps(s) );
return *this; return *this;
} }
float &operator [](const int i) { float &operator [](const int i) {
return c[i]; return c[i];
} }
friend bool operator ==(const RGBAVectorSIMD &rhs, const RGBAVectorSIMD &lhs) { friend bool operator ==(const RGBAVectorSIMD &rhs, const RGBAVectorSIMD &lhs) {
__m128 d = _mm_sub_ps(rhs.vec, lhs.vec); __m128 d = _mm_sub_ps(rhs.vec, lhs.vec);
d = _mm_mul_ps(d, d); d = _mm_mul_ps(d, d);
__m128 cmp = _mm_cmpgt_ps(d, kEpsilonSIMD); __m128 cmp = _mm_cmpgt_ps(d, kEpsilonSIMD);
cmp = _mm_hadd_ps(cmp, cmp); cmp = _mm_hadd_ps(cmp, cmp);
cmp = _mm_hadd_ps(cmp, cmp); cmp = _mm_hadd_ps(cmp, cmp);
return ((float *)(&cmp))[0] == 0.0f; return ((float *)(&cmp))[0] == 0.0f;
} }
friend bool operator !=(const RGBAVectorSIMD &rhs, const RGBAVectorSIMD &lhs) { friend bool operator !=(const RGBAVectorSIMD &rhs, const RGBAVectorSIMD &lhs) {
return !(rhs == lhs); return !(rhs == lhs);
} }
operator float *() { operator float *() {
return c; return c;
} }
// Quantize this point. // Quantize this point.
__m128i ToPixel(const __m128i &channelMask, const int pBit) const; __m128i ToPixel(const __m128i &channelMask, const int pBit) const;
__m128i ToPixel(const __m128i &channelMask) const; __m128i ToPixel(const __m128i &channelMask) const;
}; };
class RGBAMatrixSIMD { class RGBAMatrixSIMD {
private: private:
union { union {
float m[kNumColorChannels*kNumColorChannels]; float m[kNumColorChannels*kNumColorChannels];
struct { struct {
float m1, m5, m9, m13; float m1, m5, m9, m13;
float m2, m6, m10, m14; float m2, m6, m10, m14;
float m3, m7, m11, m15; float m3, m7, m11, m15;
float m4, m8, m12, m16; float m4, m8, m12, m16;
}; };
__m128 col[kNumColorChannels]; __m128 col[kNumColorChannels];
}; };
RGBAMatrixSIMD(const float *arr) { RGBAMatrixSIMD(const float *arr) {
memcpy(m, arr, sizeof(m)); memcpy(m, arr, sizeof(m));
} }
RGBAMatrixSIMD(const __m128 newcol[kNumColorChannels]) { RGBAMatrixSIMD(const __m128 newcol[kNumColorChannels]) {
for(int i = 0; i < kNumColorChannels; i++) for(int i = 0; i < kNumColorChannels; i++)
col[i] = newcol[i]; col[i] = newcol[i];
} }
public: public:
RGBAMatrixSIMD() : RGBAMatrixSIMD() :
m1(1.0f), m2(0.0f), m3(0.0f), m4(0.0f), m1(1.0f), m2(0.0f), m3(0.0f), m4(0.0f),
m5(0.0f), m6(1.0f), m7(0.0f), m8(0.0f), m5(0.0f), m6(1.0f), m7(0.0f), m8(0.0f),
m9(0.0f), m10(0.0f), m11(1.0f), m12(0.0f), m9(0.0f), m10(0.0f), m11(1.0f), m12(0.0f),
m13(0.0f), m14(0.0f), m15(0.0f), m16(1.0f) m13(0.0f), m14(0.0f), m15(0.0f), m16(1.0f)
{ } { }
RGBAMatrixSIMD &operator =(const RGBAMatrixSIMD &other) { RGBAMatrixSIMD &operator =(const RGBAMatrixSIMD &other) {
memcpy(m, other.m, sizeof(m)); memcpy(m, other.m, sizeof(m));
return (*this); return (*this);
} }
RGBAMatrixSIMD operator +(const RGBAMatrixSIMD &p) const { RGBAMatrixSIMD operator +(const RGBAMatrixSIMD &p) const {
RGBAMatrixSIMD newm; RGBAMatrixSIMD newm;
for(int i = 0; i < kNumColorChannels; i++) { for(int i = 0; i < kNumColorChannels; i++) {
newm.col[i] = _mm_add_ps(col[i], p.col[i]); newm.col[i] = _mm_add_ps(col[i], p.col[i]);
} }
return newm; return newm;
} }
RGBAMatrixSIMD &operator +=(const RGBAMatrixSIMD &p) { RGBAMatrixSIMD &operator +=(const RGBAMatrixSIMD &p) {
for(int i = 0; i < kNumColorChannels; i++) { for(int i = 0; i < kNumColorChannels; i++) {
col[i] = _mm_add_ps( col[i], p.col[i] ); col[i] = _mm_add_ps( col[i], p.col[i] );
} }
return *this; return *this;
} }
RGBAMatrixSIMD operator -(const RGBAMatrixSIMD &p) const { RGBAMatrixSIMD operator -(const RGBAMatrixSIMD &p) const {
RGBAMatrixSIMD newm; RGBAMatrixSIMD newm;
for(int i = 0; i < kNumColorChannels; i++) { for(int i = 0; i < kNumColorChannels; i++) {
newm.col[i] = _mm_sub_ps( col[i], p.col[i] ); newm.col[i] = _mm_sub_ps( col[i], p.col[i] );
} }
return newm; return newm;
} }
RGBAMatrixSIMD &operator -=(const RGBAMatrixSIMD &p) { RGBAMatrixSIMD &operator -=(const RGBAMatrixSIMD &p) {
for(int i = 0; i < kNumColorChannels; i++) { for(int i = 0; i < kNumColorChannels; i++) {
col[i] = _mm_sub_ps( col[i], p.col[i] ); col[i] = _mm_sub_ps( col[i], p.col[i] );
} }
return *this; return *this;
} }
RGBAMatrixSIMD operator /(const float s) const { RGBAMatrixSIMD operator /(const float s) const {
__m128 f = _mm_set1_ps(s); __m128 f = _mm_set1_ps(s);
RGBAMatrixSIMD newm; RGBAMatrixSIMD newm;
for(int i = 0; i < kNumColorChannels; i++) { for(int i = 0; i < kNumColorChannels; i++) {
newm.col[i] = _mm_div_ps( col[i], f ); newm.col[i] = _mm_div_ps( col[i], f );
} }
return newm; return newm;
} }
RGBAMatrixSIMD &operator /=(const float s) { RGBAMatrixSIMD &operator /=(const float s) {
__m128 f = _mm_set1_ps(s); __m128 f = _mm_set1_ps(s);
for(int i = 0; i < kNumColorChannels; i++) { for(int i = 0; i < kNumColorChannels; i++) {
col[i] = _mm_div_ps(col[i], f); col[i] = _mm_div_ps(col[i], f);
} }
return *this; return *this;
} }
RGBAMatrixSIMD operator *(const float s) const { RGBAMatrixSIMD operator *(const float s) const {
__m128 f = _mm_set1_ps(s); __m128 f = _mm_set1_ps(s);
RGBAMatrixSIMD newm; RGBAMatrixSIMD newm;
for(int i = 0; i < kNumColorChannels; i++) { for(int i = 0; i < kNumColorChannels; i++) {
newm.col[i] = _mm_mul_ps( col[i], f ); newm.col[i] = _mm_mul_ps( col[i], f );
} }
return newm; return newm;
} }
friend RGBAMatrixSIMD operator *(const float s, const RGBAMatrixSIMD &p) { friend RGBAMatrixSIMD operator *(const float s, const RGBAMatrixSIMD &p) {
__m128 f = _mm_set1_ps(s); __m128 f = _mm_set1_ps(s);
RGBAMatrixSIMD newm; RGBAMatrixSIMD newm;
for(int i = 0; i < kNumColorChannels; i++) { for(int i = 0; i < kNumColorChannels; i++) {
newm.col[i] = _mm_mul_ps( p.col[i], f ); newm.col[i] = _mm_mul_ps( p.col[i], f );
} }
return newm; return newm;
} }
RGBAMatrixSIMD &operator *=(const float s) { RGBAMatrixSIMD &operator *=(const float s) {
__m128 f = _mm_set1_ps(s); __m128 f = _mm_set1_ps(s);
for(int i = 0; i < kNumColorChannels; i++) for(int i = 0; i < kNumColorChannels; i++)
col[i] = _mm_mul_ps(col[i], f); col[i] = _mm_mul_ps(col[i], f);
return *this; return *this;
} }
float &operator ()(const int i, const int j) { float &operator ()(const int i, const int j) {
return (*this)[j*4 + i]; return (*this)[j*4 + i];
} }
float &operator [](const int i) { float &operator [](const int i) {
return m[i]; return m[i];
} }
friend bool operator ==(const RGBAMatrixSIMD &rhs, const RGBAMatrixSIMD &lhs) { friend bool operator ==(const RGBAMatrixSIMD &rhs, const RGBAMatrixSIMD &lhs) {
__m128 sum = _mm_set1_ps(0.0f); __m128 sum = _mm_set1_ps(0.0f);
for(int i = 0; i < kNumColorChannels; i++) { for(int i = 0; i < kNumColorChannels; i++) {
__m128 d = _mm_sub_ps(rhs.col[i], lhs.col[i]); __m128 d = _mm_sub_ps(rhs.col[i], lhs.col[i]);
d = _mm_mul_ps(d, d); d = _mm_mul_ps(d, d);
__m128 cmp = _mm_cmpgt_ps(d, kEpsilonSIMD); __m128 cmp = _mm_cmpgt_ps(d, kEpsilonSIMD);
cmp = _mm_hadd_ps(cmp, cmp); cmp = _mm_hadd_ps(cmp, cmp);
cmp = _mm_hadd_ps(cmp, cmp); cmp = _mm_hadd_ps(cmp, cmp);
sum = _mm_add_ps(sum, cmp); sum = _mm_add_ps(sum, cmp);
} }
if(((float *)(&sum))[0] != 0) if(((float *)(&sum))[0] != 0)
return false; return false;
else else
return true; return true;
} }
operator float *() { operator float *() {
return m; return m;
} }
RGBAVectorSIMD operator *(const RGBAVectorSIMD &p) const; RGBAVectorSIMD operator *(const RGBAVectorSIMD &p) const;
}; };
class RGBADirSIMD : public RGBAVectorSIMD { class RGBADirSIMD : public RGBAVectorSIMD {
public: public:
RGBADirSIMD() : RGBAVectorSIMD() { } RGBADirSIMD() : RGBAVectorSIMD() { }
RGBADirSIMD(const RGBAVectorSIMD &p) : RGBAVectorSIMD(p) { RGBADirSIMD(const RGBAVectorSIMD &p) : RGBAVectorSIMD(p) {
this->Normalize(); this->Normalize();
} }
}; };
// Makes sure that the values of the endpoints lie between 0 and 1. // Makes sure that the values of the endpoints lie between 0 and 1.
@ -353,69 +353,69 @@ extern void ClampEndpoints(RGBAVectorSIMD &p1, RGBAVectorSIMD &p2);
class RGBAClusterSIMD { class RGBAClusterSIMD {
public: public:
RGBAClusterSIMD() : RGBAClusterSIMD() :
m_NumPoints(0), m_Total(0.0f), m_NumPoints(0), m_Total(0.0f),
m_PointBitString(0), m_PointBitString(0),
m_Min(FLT_MAX), m_Min(FLT_MAX),
m_Max(-FLT_MAX), m_Max(-FLT_MAX),
m_PrincipalAxisCached(false) m_PrincipalAxisCached(false)
{ } { }
RGBAClusterSIMD(const RGBAClusterSIMD &c) : RGBAClusterSIMD(const RGBAClusterSIMD &c) :
m_NumPoints(c.m_NumPoints), m_NumPoints(c.m_NumPoints),
m_Total(c.m_Total), m_Total(c.m_Total),
m_PointBitString(c.m_PointBitString), m_PointBitString(c.m_PointBitString),
m_Min(c.m_Min), m_Min(c.m_Min),
m_Max(c.m_Max), m_Max(c.m_Max),
m_PrincipalAxisCached(false) m_PrincipalAxisCached(false)
{ {
memcpy(this->m_DataPoints, c.m_DataPoints, m_NumPoints * sizeof(RGBAVectorSIMD)); memcpy(this->m_DataPoints, c.m_DataPoints, m_NumPoints * sizeof(RGBAVectorSIMD));
} }
RGBAClusterSIMD(const RGBAClusterSIMD &left, const RGBAClusterSIMD &right); RGBAClusterSIMD(const RGBAClusterSIMD &left, const RGBAClusterSIMD &right);
RGBAClusterSIMD(const RGBAVectorSIMD &p, int idx) : RGBAClusterSIMD(const RGBAVectorSIMD &p, int idx) :
m_NumPoints(1), m_NumPoints(1),
m_Total(p), m_Total(p),
m_PointBitString(0), m_PointBitString(0),
m_Min(p), m_Max(p), m_Min(p), m_Max(p),
m_PrincipalAxisCached(false) m_PrincipalAxisCached(false)
{ {
m_DataPoints[0] = p; m_DataPoints[0] = p;
m_PointBitString |= (1 << idx); m_PointBitString |= (1 << idx);
} }
RGBAVectorSIMD GetTotal() const { return m_Total; } RGBAVectorSIMD GetTotal() const { return m_Total; }
const RGBAVectorSIMD &GetPoint(int idx) const { return m_DataPoints[idx]; } const RGBAVectorSIMD &GetPoint(int idx) const { return m_DataPoints[idx]; }
int GetNumPoints() const { return m_NumPoints; } int GetNumPoints() const { return m_NumPoints; }
RGBAVectorSIMD GetAvg() const { return m_Total / float(m_NumPoints); } RGBAVectorSIMD GetAvg() const { return m_Total / float(m_NumPoints); }
void AddPoint(const RGBAVectorSIMD &p, int idx); void AddPoint(const RGBAVectorSIMD &p, int idx);
void GetBoundingBox(RGBAVectorSIMD &Min, RGBAVectorSIMD &Max) const { void GetBoundingBox(RGBAVectorSIMD &Min, RGBAVectorSIMD &Max) const {
Min = m_Min, Max = m_Max; Min = m_Min, Max = m_Max;
} }
// Returns the error if we were to quantize the colors right now with the given number of buckets and bit mask. // Returns the error if we were to quantize the colors right now with the given number of buckets and bit mask.
float QuantizedError(const RGBAVectorSIMD &p1, const RGBAVectorSIMD &p2, const uint8 nBuckets, const __m128i &bitMask, const int pbits[2] = NULL, __m128i *indices = NULL) const; float QuantizedError(const RGBAVectorSIMD &p1, const RGBAVectorSIMD &p2, const uint8 nBuckets, const __m128i &bitMask, const int pbits[2] = NULL, __m128i *indices = NULL) const;
bool AllSamePoint() const { return m_Max == m_Min; } bool AllSamePoint() const { return m_Max == m_Min; }
int GetPointBitString() const { return m_PointBitString; } int GetPointBitString() const { return m_PointBitString; }
private: private:
// The number of points in the cluster. // The number of points in the cluster.
int m_NumPoints; int m_NumPoints;
RGBAVectorSIMD m_Total; RGBAVectorSIMD m_Total;
// The points in the cluster. // The points in the cluster.
RGBAVectorSIMD m_DataPoints[kMaxNumDataPoints]; RGBAVectorSIMD m_DataPoints[kMaxNumDataPoints];
RGBAVectorSIMD m_Min, m_Max; RGBAVectorSIMD m_Min, m_Max;
int m_PointBitString; int m_PointBitString;
RGBADirSIMD m_PrincipalAxis; RGBADirSIMD m_PrincipalAxis;
bool m_PrincipalAxisCached; bool m_PrincipalAxisCached;
}; };
extern void GetPrincipalAxis(const RGBAClusterSIMD &c, RGBADirSIMD &axis); extern void GetPrincipalAxis(const RGBAClusterSIMD &c, RGBADirSIMD &axis);

View file

@ -64,18 +64,18 @@ void PrintUsage() {
} }
void ExtractBasename(const char *filename, char *buf, uint32 bufSz) { void ExtractBasename(const char *filename, char *buf, uint32 bufSz) {
size_t len = strlen(filename); size_t len = strlen(filename);
const char *end = filename + len; const char *end = filename + len;
while(--end != filename) { while(--end != filename) {
if(*end == '.') if(*end == '.')
{ {
uint32 numChars = int32(end - filename + 1); uint32 numChars = int32(end - filename + 1);
uint32 toCopy = (numChars > bufSz)? bufSz : numChars; uint32 toCopy = (numChars > bufSz)? bufSz : numChars;
memcpy(buf, filename, toCopy); memcpy(buf, filename, toCopy);
buf[toCopy - 1] = '\0'; buf[toCopy - 1] = '\0';
return; return;
} }
} }
} }
int _tmain(int argc, _TCHAR* argv[]) int _tmain(int argc, _TCHAR* argv[])
@ -175,7 +175,7 @@ int _tmain(int argc, _TCHAR* argv[])
if(numThreads > 1 && bSaveLog) { if(numThreads > 1 && bSaveLog) {
bSaveLog = false; bSaveLog = false;
fprintf(stderr, "WARNING: Will not save log because implementation is not thread safe.\n" fprintf(stderr, "WARNING: Will not save log because implementation is not thread safe.\n"
"If you'd like, send a complaint to pavel@cs.unc.edu to get this done faster.\n"); "If you'd like, send a complaint to pavel@cs.unc.edu to get this done faster.\n");
} }
if(fileArg == argc) { if(fileArg == argc) {
@ -183,16 +183,16 @@ int _tmain(int argc, _TCHAR* argv[])
exit(1); exit(1);
} }
char basename[256]; char basename[256];
ExtractBasename(argv[fileArg], basename, 256); ExtractBasename(argv[fileArg], basename, 256);
ImageFile file (argv[fileArg]); ImageFile file (argv[fileArg]);
if(!file.Load()) { if(!file.Load()) {
fprintf(stderr, "Error loading file: %s\n", argv[fileArg]); fprintf(stderr, "Error loading file: %s\n", argv[fileArg]);
return 1; return 1;
} }
const Image *img = file.GetImage(); const Image *img = file.GetImage();
int numBlocks = (img->GetWidth() * img->GetHeight())/16; int numBlocks = (img->GetWidth() * img->GetHeight())/16;
BlockStatManager *statManager = NULL; BlockStatManager *statManager = NULL;
@ -224,14 +224,14 @@ int _tmain(int argc, _TCHAR* argv[])
} }
if(bSaveLog) { if(bSaveLog) {
strcat_s(basename, ".log"); strcat_s(basename, ".log");
statManager->ToFile(basename); statManager->ToFile(basename);
basename[strlen(basename) - 4] = '\0'; basename[strlen(basename) - 4] = '\0';
} }
strcat_s(basename, "-bc7.png"); strcat_s(basename, "-bc7.png");
Image cImg (*ci); Image cImg (*ci);
ImageFile cImgFile (basename, eFileFormat_PNG, cImg); ImageFile cImgFile (basename, eFileFormat_PNG, cImg);
cImgFile.Write(); cImgFile.Write();
// Cleanup // Cleanup
delete ci; delete ci;

View file

@ -54,7 +54,7 @@ class ImageLoader;
class Image { class Image {
public: public:
Image(const CompressedImage &); Image(const CompressedImage &);
Image(const ImageLoader &); Image(const ImageLoader &);
~Image(); ~Image();

View file

@ -165,8 +165,8 @@ BlockStatManager::~BlockStatManager() {
if(m_Mutex) if(m_Mutex)
{ {
delete m_Mutex; delete m_Mutex;
m_Mutex = 0; m_Mutex = 0;
} }
} }
@ -206,15 +206,15 @@ void BlockStatManager::ToFile(const CHAR *filename) {
CHAR str[256]; CHAR str[256];
#ifdef _MSC_VER #ifdef _MSC_VER
_sntprintf_s(str, 256, _TRUNCATE, "%d,%s\n", i, statStr); _sntprintf_s(str, 256, _TRUNCATE, "%d,%s\n", i, statStr);
#else #else
snprintf(str, 256, "%d,%s\n", i, statStr); snprintf(str, 256, "%d,%s\n", i, statStr);
#endif #endif
uint32 strLen = uint32(strlen(str)); uint32 strLen = uint32(strlen(str));
if(strLen > 255) { if(strLen > 255) {
str[255] = '\n'; str[255] = '\n';
strLen = 256; strLen = 256;
} }
fstr.Write((uint8 *)str, strLen); fstr.Write((uint8 *)str, strLen);

View file

@ -75,11 +75,11 @@ CompressedImage::CompressedImage(
const ECompressionFormat format, const ECompressionFormat format,
const unsigned char *data const unsigned char *data
) )
: m_Width(width) : m_Width(width)
, m_Height(height) , m_Height(height)
, m_Format(format) , m_Format(format)
, m_Data(0) , m_Data(0)
, m_DataSz(0) , m_DataSz(0)
{ {
InitData(data); InitData(data);
} }

View file

@ -95,14 +95,14 @@ Image::Image(const CompressedImage &ci)
: m_Width(ci.GetWidth()) : m_Width(ci.GetWidth())
, m_Height(ci.GetHeight()) , m_Height(ci.GetHeight())
{ {
unsigned int bufSz = ci.GetWidth() * ci.GetHeight() * 4; unsigned int bufSz = ci.GetWidth() * ci.GetHeight() * 4;
m_PixelData = new uint8[ bufSz ]; m_PixelData = new uint8[ bufSz ];
if(!m_PixelData) { fprintf(stderr, "%s\n", "Out of memory!"); return; } if(!m_PixelData) { fprintf(stderr, "%s\n", "Out of memory!"); return; }
if(!ci.DecompressImage(m_PixelData, bufSz)) { if(!ci.DecompressImage(m_PixelData, bufSz)) {
fprintf(stderr, "Error decompressing image!\n"); fprintf(stderr, "Error decompressing image!\n");
return; return;
} }
} }
Image::Image(const ImageLoader &loader) Image::Image(const ImageLoader &loader)

View file

@ -74,23 +74,23 @@ class StopWatchImpl;
class StopWatch class StopWatch
{ {
public: public:
StopWatch(); StopWatch();
StopWatch(const StopWatch &); StopWatch(const StopWatch &);
~StopWatch(); ~StopWatch();
StopWatch &operator=(const StopWatch &); StopWatch &operator=(const StopWatch &);
void Start(); void Start();
void Stop(); void Stop();
void Reset(); void Reset();
double TimeInSeconds() const; double TimeInSeconds() const;
double TimeInMilliseconds() const; double TimeInMilliseconds() const;
double TimeInMicroseconds() const; double TimeInMicroseconds() const;
private: private:
StopWatchImpl *impl; StopWatchImpl *impl;
}; };
#endif // __TEXCOMP_STOP_WATCH_H__ #endif // __TEXCOMP_STOP_WATCH_H__

View file

@ -404,5 +404,5 @@ bool CompressImageData(
} }
void YieldThread() { void YieldThread() {
TCThread::Yield(); TCThread::Yield();
} }

View file

@ -115,7 +115,7 @@ ThreadGroup::ThreadGroup( int numThreads, const unsigned char *inBuf, unsigned i
, m_CompressedBlockSize( , m_CompressedBlockSize(
(func == BC7C::Compress (func == BC7C::Compress
#ifdef HAS_SSE_41 #ifdef HAS_SSE_41
|| func == BC7C::CompressImageBC7SIMD || func == BC7C::CompressImageBC7SIMD
#endif #endif
)? )?
16 16
@ -125,7 +125,7 @@ ThreadGroup::ThreadGroup( int numThreads, const unsigned char *inBuf, unsigned i
, m_UncompressedBlockSize( , m_UncompressedBlockSize(
(func == BC7C::Compress (func == BC7C::Compress
#ifdef HAS_SSE_41 #ifdef HAS_SSE_41
|| func == BC7C::CompressImageBC7SIMD || func == BC7C::CompressImageBC7SIMD
#endif #endif
)? )?
64 64

View file

@ -81,40 +81,39 @@ void WorkerThread::operator()() {
bool quitFlag = false; bool quitFlag = false;
while(!quitFlag) { while(!quitFlag) {
switch(m_Parent->AcceptThreadData(m_ThreadIdx)) switch(m_Parent->AcceptThreadData(m_ThreadIdx)) {
{
case eAction_Quit: case eAction_Quit:
{ {
quitFlag = true; quitFlag = true;
break; break;
} }
case eAction_Wait: case eAction_Wait:
{ {
TCThread::Yield(); TCThread::Yield();
break; break;
} }
case eAction_DoWork: case eAction_DoWork:
{ {
const uint8 *src = m_Parent->GetSrcForThread(m_ThreadIdx); const uint8 *src = m_Parent->GetSrcForThread(m_ThreadIdx);
uint8 *dst = m_Parent->GetDstForThread(m_ThreadIdx); uint8 *dst = m_Parent->GetDstForThread(m_ThreadIdx);
CompressionJob cj (src, dst, 4 * m_Parent->GetNumBlocksForThread(m_ThreadIdx), 4); CompressionJob cj (src, dst, 4 * m_Parent->GetNumBlocksForThread(m_ThreadIdx), 4);
if(f) if(f)
(*f)(cj); (*f)(cj);
else else
(*fStat)(cj, *statManager); (*fStat)(cj, *statManager);
break; break;
} }
default: default:
{ {
fprintf(stderr, "Unrecognized thread command!\n"); fprintf(stderr, "Unrecognized thread command!\n");
quitFlag = true; quitFlag = true;
break; break;
} }
} }
} }
@ -244,10 +243,10 @@ WorkerThread::EAction WorkerQueue::AcceptThreadData(uint32 threadIdx) {
if(m_NextBlock == totalBlocks) { if(m_NextBlock == totalBlocks) {
if(m_NumCompressions < m_TotalNumCompressions) { if(m_NumCompressions < m_TotalNumCompressions) {
if(++m_WaitingThreads == m_ActiveThreads) { if(++m_WaitingThreads == m_ActiveThreads) {
m_NextBlock = 0; m_NextBlock = 0;
m_WaitingThreads = 0; m_WaitingThreads = 0;
} else { } else {
return WorkerThread::eAction_Wait; return WorkerThread::eAction_Wait;
} }
} }
else { else {

View file

@ -1,3 +1,55 @@
/* FasTC
* Copyright (c) 2012 University of North Carolina at Chapel Hill.
* All rights reserved.
*
* Permission to use, copy, modify, and distribute this software and its
* documentation for educational, research, and non-profit purposes, without
* fee, and without a written agreement is hereby granted, provided that the
* above copyright notice, this paragraph, and the following four paragraphs
* appear in all copies.
*
* Permission to incorporate this software into commercial products may be
* obtained by contacting the authors or the Office of Technology Development
* at the University of North Carolina at Chapel Hill <otd@unc.edu>.
*
* This software program and documentation are copyrighted by the University of
* North Carolina at Chapel Hill. The software program and documentation are
* supplied "as is," without any accompanying services from the University of
* North Carolina at Chapel Hill or the authors. The University of North
* Carolina at Chapel Hill and the authors do not warrant that the operation of
* the program will be uninterrupted or error-free. The end-user understands
* that the program was developed for research purposes and is advised not to
* rely exclusively on the program for any reason.
*
* IN NO EVENT SHALL THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE
* AUTHORS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL,
* OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
* THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF NORTH CAROLINA
* AT CHAPEL HILL OR THE AUTHORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS SPECIFICALLY
* DISCLAIM ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY
* STATUTORY WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON
* AN "AS IS" BASIS, AND THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND
* THE AUTHORS HAVE NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
* ENHANCEMENTS, OR MODIFICATIONS.
*
* Please send all BUG REPORTS to <pavel@cs.unc.edu>.
*
* The authors may be contacted via:
*
* Pavel Krajcevski
* Dept of Computer Science
* 201 S Columbia St
* Frederick P. Brooks, Jr. Computer Science Bldg
* Chapel Hill, NC 27599-3175
* USA
*
* <http://gamma.cs.unc.edu/FasTC/>
*/
#include "FileStream.h" #include "FileStream.h"
#include <Windows.h> #include <Windows.h>
@ -54,7 +106,7 @@ public:
: m_ReferenceCount(1) : m_ReferenceCount(1)
{ {
DWORD dwDesiredAccess = GENERIC_READ; DWORD dwDesiredAccess = GENERIC_READ;
DWORD dwOpenAction = OPEN_EXISTING; DWORD dwOpenAction = OPEN_EXISTING;
switch(mode) { switch(mode) {
default: default:
@ -71,13 +123,13 @@ public:
case eFileMode_WriteAppend: case eFileMode_WriteAppend:
case eFileMode_WriteBinaryAppend: case eFileMode_WriteBinaryAppend:
dwDesiredAccess = FILE_APPEND_DATA; dwDesiredAccess = FILE_APPEND_DATA;
dwOpenAction = CREATE_NEW; dwOpenAction = CREATE_NEW;
break; break;
} }
m_Handle = CreateFile(filename, dwDesiredAccess, 0, NULL, dwOpenAction, FILE_ATTRIBUTE_NORMAL, NULL); m_Handle = CreateFile(filename, dwDesiredAccess, 0, NULL, dwOpenAction, FILE_ATTRIBUTE_NORMAL, NULL);
if(m_Handle == INVALID_HANDLE_VALUE) { if(m_Handle == INVALID_HANDLE_VALUE) {
ErrorExit(TEXT("CreateFile")); ErrorExit(TEXT("CreateFile"));
} }
} }
@ -145,15 +197,14 @@ FileStream::~FileStream() {
int32 FileStream::Read(uint8 *buf, uint32 bufSz) { int32 FileStream::Read(uint8 *buf, uint32 bufSz) {
if( if(m_Mode == eFileMode_Write ||
m_Mode == eFileMode_Write ||
m_Mode == eFileMode_WriteBinary || m_Mode == eFileMode_WriteBinary ||
m_Mode == eFileMode_WriteAppend || m_Mode == eFileMode_WriteAppend ||
m_Mode == eFileMode_WriteBinaryAppend m_Mode == eFileMode_WriteBinaryAppend
) { ) {
CHAR errStr[256]; CHAR errStr[256];
_sntprintf_s(errStr, 256, "Cannot read from file '%s': File opened for reading.", m_Filename); _sntprintf_s(errStr, 256, "Cannot read from file '%s': File opened for reading.", m_Filename);
OutputDebugString(errStr); OutputDebugString(errStr);
return -2; return -2;
} }
@ -163,27 +214,27 @@ int32 FileStream::Read(uint8 *buf, uint32 bufSz) {
DWORD oldPosition = SetFilePointer(fp, 0, NULL, FILE_CURRENT); DWORD oldPosition = SetFilePointer(fp, 0, NULL, FILE_CURRENT);
if(INVALID_SET_FILE_POINTER == oldPosition) { if(INVALID_SET_FILE_POINTER == oldPosition) {
CHAR errStr[256]; CHAR errStr[256];
_sntprintf_s(errStr, 256, "Error querying the file position before reading from file '%s'(0x%x).", m_Filename, GetLastError()); _sntprintf_s(errStr, 256, "Error querying the file position before reading from file '%s'(0x%x).", m_Filename, GetLastError());
OutputDebugString(errStr); OutputDebugString(errStr);
return -1; return -1;
} }
DWORD amtRead; DWORD amtRead;
BOOL success = ReadFile(fp, buf, bufSz, &amtRead, NULL); BOOL success = ReadFile(fp, buf, bufSz, &amtRead, NULL);
if(!success) { if(!success) {
CHAR errStr[256]; CHAR errStr[256];
_sntprintf_s(errStr, 256, "Error reading from file '%s'.", m_Filename); _sntprintf_s(errStr, 256, "Error reading from file '%s'.", m_Filename);
OutputDebugString(errStr); OutputDebugString(errStr);
return -1; return -1;
} }
DWORD newPosition = SetFilePointer(fp, 0, NULL, FILE_CURRENT); DWORD newPosition = SetFilePointer(fp, 0, NULL, FILE_CURRENT);
if(INVALID_SET_FILE_POINTER == newPosition) { if(INVALID_SET_FILE_POINTER == newPosition) {
CHAR errStr[256]; CHAR errStr[256];
_sntprintf_s(errStr, 256, "Error querying the file position after reading from file '%s'(0x%x).", m_Filename, GetLastError()); _sntprintf_s(errStr, 256, "Error querying the file position after reading from file '%s'(0x%x).", m_Filename, GetLastError());
OutputDebugString(errStr); OutputDebugString(errStr);
return -1; return -1;
} }
return newPosition - oldPosition; return newPosition - oldPosition;
@ -194,9 +245,9 @@ int32 FileStream::Write(const uint8 *buf, uint32 bufSz) {
m_Mode == eFileMode_Read || m_Mode == eFileMode_Read ||
m_Mode == eFileMode_ReadBinary m_Mode == eFileMode_ReadBinary
) { ) {
CHAR errStr[256]; CHAR errStr[256];
_sntprintf_s(errStr, 256, "Cannot write to file '%s': File opened for writing.", m_Filename); _sntprintf_s(errStr, 256, "Cannot write to file '%s': File opened for writing.", m_Filename);
OutputDebugString(errStr); OutputDebugString(errStr);
return -2; return -2;
} }
@ -213,10 +264,10 @@ int32 FileStream::Write(const uint8 *buf, uint32 bufSz) {
} }
if(INVALID_SET_FILE_POINTER == dwPos) { if(INVALID_SET_FILE_POINTER == dwPos) {
CHAR errStr[256]; CHAR errStr[256];
_sntprintf_s(errStr, 256, "Error querying the file position before reading to file '%s'(0x%x).", m_Filename, GetLastError()); _sntprintf_s(errStr, 256, "Error querying the file position before reading to file '%s'(0x%x).", m_Filename, GetLastError());
OutputDebugString(errStr); OutputDebugString(errStr);
return -1; return -1;
} }
while(!LockFile(fp, dwPos, 0, bufSz, 0)) Sleep(1); while(!LockFile(fp, dwPos, 0, bufSz, 0)) Sleep(1);
@ -227,9 +278,9 @@ int32 FileStream::Write(const uint8 *buf, uint32 bufSz) {
UnlockFile(fp, dwPos, 0, bufSz, 0); UnlockFile(fp, dwPos, 0, bufSz, 0);
if(!success) { if(!success) {
CHAR errStr[256]; CHAR errStr[256];
_sntprintf_s(errStr, 256, "Error writing to file '%s'.", m_Filename); _sntprintf_s(errStr, 256, "Error writing to file '%s'.", m_Filename);
OutputDebugString(errStr); OutputDebugString(errStr);
return -1; return -1;
} }
@ -244,10 +295,10 @@ int32 FileStream::Tell() {
DWORD pos = SetFilePointer(fp, 0, NULL, FILE_CURRENT); DWORD pos = SetFilePointer(fp, 0, NULL, FILE_CURRENT);
if(INVALID_SET_FILE_POINTER == pos) { if(INVALID_SET_FILE_POINTER == pos) {
CHAR errStr[256]; CHAR errStr[256];
_sntprintf_s(errStr, 256, "Error querying the file position before reading to file '%s'(0x%x).", m_Filename, GetLastError()); _sntprintf_s(errStr, 256, "Error querying the file position before reading to file '%s'(0x%x).", m_Filename, GetLastError());
OutputDebugString(errStr); OutputDebugString(errStr);
return -1; return -1;
} }
return pos; return pos;
@ -264,17 +315,17 @@ bool FileStream::Seek(uint32 offset, ESeekPosition pos) {
DWORD origin = FILE_BEGIN; DWORD origin = FILE_BEGIN;
switch(pos) { switch(pos) {
default: default:
case eSeekPosition_Beginning: case eSeekPosition_Beginning:
// Do nothing // Do nothing
break; break;
case eSeekPosition_Current: case eSeekPosition_Current:
origin = FILE_CURRENT; origin = FILE_CURRENT;
break; break;
case eSeekPosition_End: case eSeekPosition_End:
origin = FILE_END; origin = FILE_END;
break; break;
} }

View file

@ -172,54 +172,54 @@ bool ImageLoader::LoadImage() {
// For each block, visit the pixels in sequential order // For each block, visit the pixels in sequential order
for(uint32 y = i; y < i+4; y++) { for(uint32 y = i; y < i+4; y++) {
for(uint32 x = j; x < j+4; x++) { for(uint32 x = j; x < j+4; x++) {
if(y >= m_Height || x >= m_Width) { if(y >= m_Height || x >= m_Width) {
m_PixelData[byteIdx++] = 0; // r m_PixelData[byteIdx++] = 0; // r
m_PixelData[byteIdx++] = 0; // g m_PixelData[byteIdx++] = 0; // g
m_PixelData[byteIdx++] = 0; // b m_PixelData[byteIdx++] = 0; // b
m_PixelData[byteIdx++] = 0; // a m_PixelData[byteIdx++] = 0; // a
continue; continue;
} }
unsigned int redVal = GetChannelForPixel(x, y, 0); unsigned int redVal = GetChannelForPixel(x, y, 0);
if(redVal == INT_MAX) if(redVal == INT_MAX)
return false; return false;
unsigned int greenVal = redVal; unsigned int greenVal = redVal;
unsigned int blueVal = redVal; unsigned int blueVal = redVal;
if(GetGreenChannelPrecision() > 0) { if(GetGreenChannelPrecision() > 0) {
greenVal = GetChannelForPixel(x, y, 1); greenVal = GetChannelForPixel(x, y, 1);
if(greenVal == INT_MAX) if(greenVal == INT_MAX)
return false; return false;
} }
if(GetBlueChannelPrecision() > 0) { if(GetBlueChannelPrecision() > 0) {
blueVal = GetChannelForPixel(x, y, 2); blueVal = GetChannelForPixel(x, y, 2);
if(blueVal == INT_MAX) if(blueVal == INT_MAX)
return false; return false;
} }
unsigned int alphaVal = 0xFF; unsigned int alphaVal = 0xFF;
if(GetAlphaChannelPrecision() > 0) { if(GetAlphaChannelPrecision() > 0) {
alphaVal = GetChannelForPixel(x, y, 3); alphaVal = GetChannelForPixel(x, y, 3);
if(alphaVal == INT_MAX) if(alphaVal == INT_MAX)
return false; return false;
} }
// Red channel // Red channel
m_PixelData[byteIdx++] = redVal & 0xFF; m_PixelData[byteIdx++] = redVal & 0xFF;
// Green channel // Green channel
m_PixelData[byteIdx++] = greenVal & 0xFF; m_PixelData[byteIdx++] = greenVal & 0xFF;
// Blue channel // Blue channel
m_PixelData[byteIdx++] = blueVal & 0xFF; m_PixelData[byteIdx++] = blueVal & 0xFF;
// Alpha channel // Alpha channel
m_PixelData[byteIdx++] = alphaVal & 0xFF; m_PixelData[byteIdx++] = alphaVal & 0xFF;
} }
} }
} }
} }

View file

@ -54,10 +54,8 @@ static void ReportError(const char *msg) {
class PNGStreamReader { class PNGStreamReader {
public: public:
static void ReadDataFromStream( static void ReadDataFromStream(png_structp png_ptr,
png_structp png_ptr, png_bytep outBytes, png_size_t byteCountToRead
png_bytep outBytes,
png_size_t byteCountToRead
) { ) {
png_voidp io_ptr = png_get_io_ptr( png_ptr ); png_voidp io_ptr = png_get_io_ptr( png_ptr );
if( io_ptr == NULL ) { if( io_ptr == NULL ) {
@ -120,9 +118,9 @@ bool ImageLoaderPNG::ReadData() {
int colorType = -1; int colorType = -1;
if( 1 != png_get_IHDR(png_ptr, info_ptr, if( 1 != png_get_IHDR(png_ptr, info_ptr,
(png_uint_32 *)(&m_Width), (png_uint_32 *)(&m_Height), (png_uint_32 *)(&m_Width), (png_uint_32 *)(&m_Height),
&bitDepth, &colorType, &bitDepth, &colorType,
NULL, NULL, NULL) NULL, NULL, NULL)
) { ) {
ReportError("Could not read PNG header"); ReportError("Could not read PNG header");
png_destroy_read_struct(&png_ptr, NULL, NULL); png_destroy_read_struct(&png_ptr, NULL, NULL);
@ -140,33 +138,33 @@ bool ImageLoaderPNG::ReadData() {
png_bytep rowData = new png_byte[bpr]; png_bytep rowData = new png_byte[bpr];
switch(colorType) { switch(colorType) {
default: default:
case PNG_COLOR_TYPE_PALETTE: case PNG_COLOR_TYPE_PALETTE:
ReportError("PNG color type unsupported"); ReportError("PNG color type unsupported");
png_destroy_read_struct(&png_ptr, NULL, NULL); png_destroy_read_struct(&png_ptr, NULL, NULL);
return false; return false;
case PNG_COLOR_TYPE_GRAY: { case PNG_COLOR_TYPE_GRAY: {
m_RedChannelPrecision = bitDepth; m_RedChannelPrecision = bitDepth;
m_RedData = new unsigned char[numPixels]; m_RedData = new unsigned char[numPixels];
for(uint32 i = 0; i < m_Height; i++) { for(uint32 i = 0; i < m_Height; i++) {
png_read_row(png_ptr, rowData, NULL); png_read_row(png_ptr, rowData, NULL);
unsigned int rowOffset = i * m_Width; unsigned int rowOffset = i * m_Width;
unsigned int byteIdx = 0; unsigned int byteIdx = 0;
for(uint32 j = 0; j < m_Width; j++) { for(uint32 j = 0; j < m_Width; j++) {
m_RedData[rowOffset + j] = rowData[byteIdx++]; m_RedData[rowOffset + j] = rowData[byteIdx++];
} }
assert(byteIdx == bpr); assert(byteIdx == bpr);
} }
} }
break; break;
case PNG_COLOR_TYPE_RGB: case PNG_COLOR_TYPE_RGB:
m_RedChannelPrecision = bitDepth; m_RedChannelPrecision = bitDepth;
m_RedData = new unsigned char[numPixels]; m_RedData = new unsigned char[numPixels];
m_GreenChannelPrecision = bitDepth; m_GreenChannelPrecision = bitDepth;
@ -176,22 +174,22 @@ bool ImageLoaderPNG::ReadData() {
for(uint32 i = 0; i < m_Height; i++) { for(uint32 i = 0; i < m_Height; i++) {
png_read_row(png_ptr, rowData, NULL); png_read_row(png_ptr, rowData, NULL);
unsigned int rowOffset = i * m_Width; unsigned int rowOffset = i * m_Width;
unsigned int byteIdx = 0; unsigned int byteIdx = 0;
for(uint32 j = 0; j < m_Width; j++) { for(uint32 j = 0; j < m_Width; j++) {
m_RedData[rowOffset + j] = rowData[byteIdx++]; m_RedData[rowOffset + j] = rowData[byteIdx++];
m_GreenData[rowOffset + j] = rowData[byteIdx++]; m_GreenData[rowOffset + j] = rowData[byteIdx++];
m_BlueData[rowOffset + j] = rowData[byteIdx++]; m_BlueData[rowOffset + j] = rowData[byteIdx++];
} }
assert(byteIdx == bpr); assert(byteIdx == bpr);
} }
break; break;
case PNG_COLOR_TYPE_RGB_ALPHA: case PNG_COLOR_TYPE_RGB_ALPHA:
m_RedChannelPrecision = bitDepth; m_RedChannelPrecision = bitDepth;
m_RedData = new unsigned char[numPixels]; m_RedData = new unsigned char[numPixels];
m_GreenChannelPrecision = bitDepth; m_GreenChannelPrecision = bitDepth;
@ -203,23 +201,23 @@ bool ImageLoaderPNG::ReadData() {
for(uint32 i = 0; i < m_Height; i++) { for(uint32 i = 0; i < m_Height; i++) {
png_read_row(png_ptr, rowData, NULL); png_read_row(png_ptr, rowData, NULL);
unsigned int rowOffset = i * m_Width; unsigned int rowOffset = i * m_Width;
unsigned int byteIdx = 0; unsigned int byteIdx = 0;
for(uint32 j = 0; j < m_Width; j++) { for(uint32 j = 0; j < m_Width; j++) {
m_RedData[rowOffset + j] = rowData[byteIdx++]; m_RedData[rowOffset + j] = rowData[byteIdx++];
m_GreenData[rowOffset + j] = rowData[byteIdx++]; m_GreenData[rowOffset + j] = rowData[byteIdx++];
m_BlueData[rowOffset + j] = rowData[byteIdx++]; m_BlueData[rowOffset + j] = rowData[byteIdx++];
m_AlphaData[rowOffset + j] = rowData[byteIdx++]; m_AlphaData[rowOffset + j] = rowData[byteIdx++];
} }
assert(byteIdx == bpr); assert(byteIdx == bpr);
} }
break; break;
case PNG_COLOR_TYPE_GRAY_ALPHA: case PNG_COLOR_TYPE_GRAY_ALPHA:
m_RedChannelPrecision = bitDepth; m_RedChannelPrecision = bitDepth;
m_RedData = new unsigned char[numPixels]; m_RedData = new unsigned char[numPixels];
m_AlphaChannelPrecision = bitDepth; m_AlphaChannelPrecision = bitDepth;
@ -227,17 +225,17 @@ bool ImageLoaderPNG::ReadData() {
for(uint32 i = 0; i < m_Height; i++) { for(uint32 i = 0; i < m_Height; i++) {
png_read_row(png_ptr, rowData, NULL); png_read_row(png_ptr, rowData, NULL);
unsigned int rowOffset = i * m_Width; unsigned int rowOffset = i * m_Width;
unsigned int byteIdx = 0; unsigned int byteIdx = 0;
for(uint32 j = 0; j < m_Width; j++) { for(uint32 j = 0; j < m_Width; j++) {
m_RedData[rowOffset + j] = rowData[byteIdx++]; m_RedData[rowOffset + j] = rowData[byteIdx++];
m_AlphaData[rowOffset + j] = rowData[byteIdx++]; m_AlphaData[rowOffset + j] = rowData[byteIdx++];
} }
assert(byteIdx == bpr); assert(byteIdx == bpr);
} }
break; break;
} }

View file

@ -66,87 +66,87 @@ public:
ImageWriterPNG &writer = *(ImageWriterPNG *)(io_ptr); ImageWriterPNG &writer = *(ImageWriterPNG *)(io_ptr);
while(writer.m_StreamPosition + byteCountToWrite > writer.m_RawFileDataSz) { while(writer.m_StreamPosition + byteCountToWrite > writer.m_RawFileDataSz) {
uint8 *newData = new uint8[writer.m_RawFileDataSz << 1]; uint8 *newData = new uint8[writer.m_RawFileDataSz << 1];
memcpy(newData, writer.m_RawFileData, writer.m_RawFileDataSz); memcpy(newData, writer.m_RawFileData, writer.m_RawFileDataSz);
writer.m_RawFileDataSz <<= 1; writer.m_RawFileDataSz <<= 1;
delete writer.m_RawFileData; delete writer.m_RawFileData;
writer.m_RawFileData = newData; writer.m_RawFileData = newData;
} }
unsigned char *stream = &(writer.m_RawFileData[writer.m_StreamPosition]); unsigned char *stream = &(writer.m_RawFileData[writer.m_StreamPosition]);
memcpy(stream, outBytes, byteCountToWrite); memcpy(stream, outBytes, byteCountToWrite);
writer.m_StreamPosition += byteCountToWrite; writer.m_StreamPosition += byteCountToWrite;
} }
static void FlushStream(png_structp png_ptr) { /* Do nothing... */ } static void FlushStream(png_structp png_ptr) { /* Do nothing... */ }
}; };
ImageWriterPNG::ImageWriterPNG(const Image &im) ImageWriterPNG::ImageWriterPNG(const Image &im)
: ImageWriter(im.GetWidth(), im.GetHeight(), im.RawData()) : ImageWriter(im.GetWidth(), im.GetHeight(), im.RawData())
, m_StreamPosition(0) , m_StreamPosition(0)
{ {
} }
bool ImageWriterPNG::WriteImage() { bool ImageWriterPNG::WriteImage() {
png_structp png_ptr = NULL; png_structp png_ptr = NULL;
png_infop info_ptr = NULL; png_infop info_ptr = NULL;
png_byte ** row_pointers = NULL; png_byte ** row_pointers = NULL;
int pixel_size = 4; int pixel_size = 4;
int depth = 8; int depth = 8;
png_ptr = png_create_write_struct (PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); png_ptr = png_create_write_struct (PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
if (png_ptr == NULL) { if (png_ptr == NULL) {
return false; return false;
} }
info_ptr = png_create_info_struct (png_ptr); info_ptr = png_create_info_struct (png_ptr);
if (info_ptr == NULL) { if (info_ptr == NULL) {
png_destroy_write_struct (&png_ptr, &info_ptr); png_destroy_write_struct (&png_ptr, &info_ptr);
return false; return false;
} }
/* Set image attributes. */ /* Set image attributes. */
png_set_IHDR (png_ptr, png_set_IHDR (png_ptr,
info_ptr, info_ptr,
m_Width, m_Width,
m_Height, m_Height,
depth, depth,
PNG_COLOR_TYPE_RGBA, PNG_COLOR_TYPE_RGBA,
PNG_INTERLACE_NONE, PNG_INTERLACE_NONE,
PNG_COMPRESSION_TYPE_DEFAULT, PNG_COMPRESSION_TYPE_DEFAULT,
PNG_FILTER_TYPE_DEFAULT); PNG_FILTER_TYPE_DEFAULT);
/* Initialize rows of PNG. */ /* Initialize rows of PNG. */
row_pointers = (png_byte **)png_malloc (png_ptr, m_Height * sizeof (png_byte *)); row_pointers = (png_byte **)png_malloc (png_ptr, m_Height * sizeof (png_byte *));
for (uint32 y = 0; y < m_Height; ++y) { for (uint32 y = 0; y < m_Height; ++y) {
png_byte *row = (png_byte *)png_malloc (png_ptr, sizeof (uint8) * m_Width * pixel_size); png_byte *row = (png_byte *)png_malloc (png_ptr, sizeof (uint8) * m_Width * pixel_size);
row_pointers[y] = row; row_pointers[y] = row;
for (uint32 x = 0; x < m_Width; ++x) { for (uint32 x = 0; x < m_Width; ++x) {
for(uint32 ch = 0; ch < 4; ch++) { for(uint32 ch = 0; ch < 4; ch++) {
*row++ = GetChannelForPixel(x, y, ch); *row++ = GetChannelForPixel(x, y, ch);
} }
} }
} }
png_set_write_fn(png_ptr, this, PNGStreamWriter::WriteDataToStream, PNGStreamWriter::FlushStream); png_set_write_fn(png_ptr, this, PNGStreamWriter::WriteDataToStream, PNGStreamWriter::FlushStream);
png_set_rows (png_ptr, info_ptr, row_pointers); png_set_rows (png_ptr, info_ptr, row_pointers);
png_write_png (png_ptr, info_ptr, PNG_TRANSFORM_IDENTITY, NULL); png_write_png (png_ptr, info_ptr, PNG_TRANSFORM_IDENTITY, NULL);
for (uint32 y = 0; y < m_Height; y++) { for (uint32 y = 0; y < m_Height; y++) {
png_free (png_ptr, row_pointers[y]); png_free (png_ptr, row_pointers[y]);
} }
png_free (png_ptr, row_pointers); png_free (png_ptr, row_pointers);
png_destroy_write_struct (&png_ptr, &info_ptr); png_destroy_write_struct (&png_ptr, &info_ptr);
m_RawFileDataSz = m_StreamPosition; m_RawFileDataSz = m_StreamPosition;
return true; return true;
} }

View file

@ -55,8 +55,8 @@ class ImageWriterPNG : public ImageWriter {
virtual bool WriteImage(); virtual bool WriteImage();
private: private:
uint32 m_StreamPosition; uint32 m_StreamPosition;
friend class PNGStreamWriter; friend class PNGStreamWriter;
}; };
#endif // _IMAGE_LOADER_H_ #endif // _IMAGE_LOADER_H_