Get rid of evil tabs once and forever (from cpp/h files)

This commit is contained in:
Pavel Krajcevski 2013-08-26 16:54:08 -04:00
parent af2318027b
commit 03a7934644
24 changed files with 3303 additions and 3259 deletions

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -77,7 +77,7 @@ class BitStream {
{ }
int GetBitsWritten() const { return m_BitsWritten; }
~BitStream() { }
void WriteBitsR(unsigned int val, unsigned int nBits) {
for(unsigned int i = 0; i < nBits; i++) {

View file

@ -89,22 +89,22 @@ static T max(const T &a, const T &b) {
static const double kPi = 3.141592653589793238462643383279502884197;
static const float kFloatConversion[256] = {
0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f,
16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f,
32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f,
48.0f, 49.0f, 50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, 61.0f, 62.0f, 63.0f,
64.0f, 65.0f, 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, 76.0f, 77.0f, 78.0f, 79.0f,
80.0f, 81.0f, 82.0f, 83.0f, 84.0f, 85.0f, 86.0f, 87.0f, 88.0f, 89.0f, 90.0f, 91.0f, 92.0f, 93.0f, 94.0f, 95.0f,
96.0f, 97.0f, 98.0f, 99.0f, 100.0f, 101.0f, 102.0f, 103.0f, 104.0f, 105.0f, 106.0f, 107.0f, 108.0f, 109.0f, 110.0f, 111.0f,
112.0f, 113.0f, 114.0f, 115.0f, 116.0f, 117.0f, 118.0f, 119.0f, 120.0f, 121.0f, 122.0f, 123.0f, 124.0f, 125.0f, 126.0f, 127.0f,
128.0f, 129.0f, 130.0f, 131.0f, 132.0f, 133.0f, 134.0f, 135.0f, 136.0f, 137.0f, 138.0f, 139.0f, 140.0f, 141.0f, 142.0f, 143.0f,
144.0f, 145.0f, 146.0f, 147.0f, 148.0f, 149.0f, 150.0f, 151.0f, 152.0f, 153.0f, 154.0f, 155.0f, 156.0f, 157.0f, 158.0f, 159.0f,
160.0f, 161.0f, 162.0f, 163.0f, 164.0f, 165.0f, 166.0f, 167.0f, 168.0f, 169.0f, 170.0f, 171.0f, 172.0f, 173.0f, 174.0f, 175.0f,
176.0f, 177.0f, 178.0f, 179.0f, 180.0f, 181.0f, 182.0f, 183.0f, 184.0f, 185.0f, 186.0f, 187.0f, 188.0f, 189.0f, 190.0f, 191.0f,
192.0f, 193.0f, 194.0f, 195.0f, 196.0f, 197.0f, 198.0f, 199.0f, 200.0f, 201.0f, 202.0f, 203.0f, 204.0f, 205.0f, 206.0f, 207.0f,
208.0f, 209.0f, 210.0f, 211.0f, 212.0f, 213.0f, 214.0f, 215.0f, 216.0f, 217.0f, 218.0f, 219.0f, 220.0f, 221.0f, 222.0f, 223.0f,
224.0f, 225.0f, 226.0f, 227.0f, 228.0f, 229.0f, 230.0f, 231.0f, 232.0f, 233.0f, 234.0f, 235.0f, 236.0f, 237.0f, 238.0f, 239.0f,
240.0f, 241.0f, 242.0f, 243.0f, 244.0f, 245.0f, 246.0f, 247.0f, 248.0f, 249.0f, 250.0f, 251.0f, 252.0f, 253.0f, 254.0f, 255.0f
0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f,
16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f,
32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f,
48.0f, 49.0f, 50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, 61.0f, 62.0f, 63.0f,
64.0f, 65.0f, 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, 76.0f, 77.0f, 78.0f, 79.0f,
80.0f, 81.0f, 82.0f, 83.0f, 84.0f, 85.0f, 86.0f, 87.0f, 88.0f, 89.0f, 90.0f, 91.0f, 92.0f, 93.0f, 94.0f, 95.0f,
96.0f, 97.0f, 98.0f, 99.0f, 100.0f, 101.0f, 102.0f, 103.0f, 104.0f, 105.0f, 106.0f, 107.0f, 108.0f, 109.0f, 110.0f, 111.0f,
112.0f, 113.0f, 114.0f, 115.0f, 116.0f, 117.0f, 118.0f, 119.0f, 120.0f, 121.0f, 122.0f, 123.0f, 124.0f, 125.0f, 126.0f, 127.0f,
128.0f, 129.0f, 130.0f, 131.0f, 132.0f, 133.0f, 134.0f, 135.0f, 136.0f, 137.0f, 138.0f, 139.0f, 140.0f, 141.0f, 142.0f, 143.0f,
144.0f, 145.0f, 146.0f, 147.0f, 148.0f, 149.0f, 150.0f, 151.0f, 152.0f, 153.0f, 154.0f, 155.0f, 156.0f, 157.0f, 158.0f, 159.0f,
160.0f, 161.0f, 162.0f, 163.0f, 164.0f, 165.0f, 166.0f, 167.0f, 168.0f, 169.0f, 170.0f, 171.0f, 172.0f, 173.0f, 174.0f, 175.0f,
176.0f, 177.0f, 178.0f, 179.0f, 180.0f, 181.0f, 182.0f, 183.0f, 184.0f, 185.0f, 186.0f, 187.0f, 188.0f, 189.0f, 190.0f, 191.0f,
192.0f, 193.0f, 194.0f, 195.0f, 196.0f, 197.0f, 198.0f, 199.0f, 200.0f, 201.0f, 202.0f, 203.0f, 204.0f, 205.0f, 206.0f, 207.0f,
208.0f, 209.0f, 210.0f, 211.0f, 212.0f, 213.0f, 214.0f, 215.0f, 216.0f, 217.0f, 218.0f, 219.0f, 220.0f, 221.0f, 222.0f, 223.0f,
224.0f, 225.0f, 226.0f, 227.0f, 228.0f, 229.0f, 230.0f, 231.0f, 232.0f, 233.0f, 234.0f, 235.0f, 236.0f, 237.0f, 238.0f, 239.0f,
240.0f, 241.0f, 242.0f, 243.0f, 244.0f, 245.0f, 246.0f, 247.0f, 248.0f, 249.0f, 250.0f, 251.0f, 252.0f, 253.0f, 254.0f, 255.0f
};
///////////////////////////////////////////////////////////////////////////////
@ -115,41 +115,41 @@ static const float kFloatConversion[256] = {
static inline uint32 CountBitsInMask(uint8 n) {
#if defined(_WIN64) || defined(__x86_64__) || defined(NO_INLINE_ASSEMBLY)
if(!n) return 0; // no bits set
if(!(n & (n-1))) return 1; // power of two
if(!n) return 0; // no bits set
if(!(n & (n-1))) return 1; // power of two
uint32 c;
for(c = 0; n; c++) {
n &= n - 1;
}
return c;
uint32 c;
for(c = 0; n; c++) {
n &= n - 1;
}
return c;
#else
#ifdef _MSC_VER
__asm {
mov eax, 8
movzx ecx, n
bsf ecx, ecx
sub eax, ecx
__asm {
mov eax, 8
movzx ecx, n
bsf ecx, ecx
sub eax, ecx
}
#else
uint32 ans;
__asm__("movl $8, %%eax;"
"movzbl %b1, %%ecx;"
"bsf %%ecx, %%ecx;"
"subl %%ecx, %%eax;"
"movl %%eax, %0;"
: "=Q"(ans)
: "b"(n)
: "%eax", "%ecx"
);
return ans;
#endif
uint32 ans;
__asm__("movl $8, %%eax;"
"movzbl %b1, %%ecx;"
"bsf %%ecx, %%ecx;"
"subl %%ecx, %%eax;"
"movl %%eax, %0;"
: "=Q"(ans)
: "b"(n)
: "%eax", "%ecx"
);
return ans;
#endif
#endif
}
template <typename ty>
static inline void clamp(ty &x, const ty &min, const ty &max) {
x = (x < min)? min : ((x > max)? max : x);
x = (x < min)? min : ((x > max)? max : x);
}
// absolute distance. It turns out the compiler does a much
@ -157,23 +157,23 @@ static inline void clamp(ty &x, const ty &min, const ty &max) {
// translate the values to/from registers
static uint8 sad(uint8 a, uint8 b) {
#if 0
__asm
{
movzx eax, a
movzx ecx, b
sub eax, ecx
jns done
neg eax
__asm
{
movzx eax, a
movzx ecx, b
sub eax, ecx
jns done
neg eax
done:
}
}
#else
//const INT d = a - b;
//const INT mask = d >> 31;
//return (d ^ mask) - mask;
//const INT d = a - b;
//const INT mask = d >> 31;
//return (d ^ mask) - mask;
// return abs(a - b);
// return abs(a - b);
return (a > b)? a - b : b - a;
return (a > b)? a - b : b - a;
#endif
}
@ -186,55 +186,55 @@ done:
uint8 QuantizeChannel(const uint8 val, const uint8 mask, const int pBit) {
// If the mask is all the bits, then we can just return the value.
if(mask == 0xFF) {
// If the mask is all the bits, then we can just return the value.
if(mask == 0xFF) {
return val;
}
}
// Otherwise if the mask is no bits then we'll assume that they want
// all the bits ... this is only really relevant for alpha...
if(mask == 0x0) {
return 0xFF;
}
// Otherwise if the mask is no bits then we'll assume that they want
// all the bits ... this is only really relevant for alpha...
if(mask == 0x0) {
return 0xFF;
}
uint32 prec = CountBitsInMask(mask);
const uint32 step = 1 << (8 - prec);
uint32 prec = CountBitsInMask(mask);
const uint32 step = 1 << (8 - prec);
assert(step-1 == uint8(~mask));
assert(step-1 == uint8(~mask));
uint32 lval = val & mask;
uint32 hval = lval + step;
uint32 lval = val & mask;
uint32 hval = lval + step;
if(pBit >= 0) {
prec++;
lval |= !!(pBit) << (8 - prec);
hval |= !!(pBit) << (8 - prec);
}
if(pBit >= 0) {
prec++;
lval |= !!(pBit) << (8 - prec);
hval |= !!(pBit) << (8 - prec);
}
if(lval > val) {
lval -= step;
hval -= step;
}
if(lval > val) {
lval -= step;
hval -= step;
}
lval |= lval >> prec;
hval |= hval >> prec;
lval |= lval >> prec;
hval |= hval >> prec;
if(sad(val, lval) < sad(val, hval))
return lval;
else
return hval;
if(sad(val, lval) < sad(val, hval))
return lval;
else
return hval;
}
uint32 RGBAVector::ToPixel(const uint32 channelMask, const int pBit) const {
const uint8 pRet0 = QuantizeChannel(uint32(r + 0.5) & 0xFF, channelMask & 0xFF, pBit);
const uint8 pRet1 = QuantizeChannel(uint32(g + 0.5) & 0xFF, (channelMask >> 8) & 0xFF, pBit);
const uint8 pRet2 = QuantizeChannel(uint32(b + 0.5) & 0xFF, (channelMask >> 16) & 0xFF, pBit);
const uint8 pRet3 = QuantizeChannel(uint32(a + 0.5) & 0xFF, (channelMask >> 24) & 0xFF, pBit);
const uint8 pRet0 = QuantizeChannel(uint32(r + 0.5) & 0xFF, channelMask & 0xFF, pBit);
const uint8 pRet1 = QuantizeChannel(uint32(g + 0.5) & 0xFF, (channelMask >> 8) & 0xFF, pBit);
const uint8 pRet2 = QuantizeChannel(uint32(b + 0.5) & 0xFF, (channelMask >> 16) & 0xFF, pBit);
const uint8 pRet3 = QuantizeChannel(uint32(a + 0.5) & 0xFF, (channelMask >> 24) & 0xFF, pBit);
const uint32 ret = pRet0 | (pRet1 << 8) | (pRet2 << 16) | (pRet3 << 24);
const uint32 ret = pRet0 | (pRet1 << 8) | (pRet2 << 16) | (pRet3 << 24);
return ret;
return ret;
}
///////////////////////////////////////////////////////////////////////////////
@ -244,85 +244,85 @@ uint32 RGBAVector::ToPixel(const uint32 channelMask, const int pBit) const {
///////////////////////////////////////////////////////////////////////////////
RGBAMatrix &RGBAMatrix::operator *=(const RGBAMatrix &mat) {
*this = ((*this) * mat);
return (*this);
*this = ((*this) * mat);
return (*this);
}
RGBAMatrix RGBAMatrix::operator *(const RGBAMatrix &mat) const {
RGBAMatrix result;
RGBAMatrix result;
for(int i = 0; i < 4; i++) {
for(int j = 0; j < 4; j++) {
for(int i = 0; i < 4; i++) {
for(int j = 0; j < 4; j++) {
result(i, j) = 0.0f;
for(int k = 0; k < 4; k++) {
result(i, j) += m[i*4 + k] * mat.m[k*4 + j];
}
}
}
result(i, j) = 0.0f;
for(int k = 0; k < 4; k++) {
result(i, j) += m[i*4 + k] * mat.m[k*4 + j];
}
}
}
return result;
return result;
}
RGBAVector RGBAMatrix::operator *(const RGBAVector &p) const {
return RGBAVector (
p.x * m1 + p.y * m2 + p.z * m3 + p.w * m4,
p.x * m5 + p.y * m6 + p.z * m7 + p.w * m8,
p.x * m9 + p.y * m10 + p.z * m11 + p.w * m12,
p.x * m13 + p.y * m14 + p.z * m15 + p.w * m16
);
return RGBAVector (
p.x * m1 + p.y * m2 + p.z * m3 + p.w * m4,
p.x * m5 + p.y * m6 + p.z * m7 + p.w * m8,
p.x * m9 + p.y * m10 + p.z * m11 + p.w * m12,
p.x * m13 + p.y * m14 + p.z * m15 + p.w * m16
);
}
RGBAMatrix RGBAMatrix::RotateX(float rad) {
RGBAMatrix result;
result.m6 = result.m11 = cos(rad);
result.m10 = sin(rad);
result.m7 = -result.m10;
return result;
RGBAMatrix result;
result.m6 = result.m11 = cos(rad);
result.m10 = sin(rad);
result.m7 = -result.m10;
return result;
}
RGBAMatrix RGBAMatrix::RotateY(float rad) {
RGBAMatrix result;
result.m1 = result.m11 = cos(rad);
result.m3 = sin(rad);
result.m9 = -result.m3;
return result;
RGBAMatrix result;
result.m1 = result.m11 = cos(rad);
result.m3 = sin(rad);
result.m9 = -result.m3;
return result;
}
RGBAMatrix RGBAMatrix::RotateZ(float rad) {
RGBAMatrix result;
result.m1 = result.m6 = cos(rad);
result.m5 = sin(rad);
result.m2 = -result.m5;
return result;
RGBAMatrix result;
result.m1 = result.m6 = cos(rad);
result.m5 = sin(rad);
result.m2 = -result.m5;
return result;
}
RGBAMatrix RGBAMatrix::Translate(const RGBAVector &t) {
RGBAMatrix result;
result.m4 = t.x;
result.m8 = t.y;
result.m12 = t.z;
result.m16 = t.w;
return result;
RGBAMatrix result;
result.m4 = t.x;
result.m8 = t.y;
result.m12 = t.z;
result.m16 = t.w;
return result;
}
bool RGBAMatrix::Identity() {
for(int i = 0; i < 4; i++) {
for(int j = 0; j < 4; j++) {
for(int i = 0; i < 4; i++) {
for(int j = 0; j < 4; j++) {
if(i == j) {
if(fabs(m[i*4 + j] - 1.0f) > 1e-5)
return false;
}
else {
if(fabs(m[i*4 + j]) > 1e-5)
return false;
}
}
}
if(i == j) {
if(fabs(m[i*4 + j] - 1.0f) > 1e-5)
return false;
}
else {
if(fabs(m[i*4 + j]) > 1e-5)
return false;
}
}
}
return true;
return true;
}
///////////////////////////////////////////////////////////////////////////////
@ -332,45 +332,45 @@ bool RGBAMatrix::Identity() {
///////////////////////////////////////////////////////////////////////////////
RGBACluster::RGBACluster(const RGBACluster &left, const RGBACluster &right) {
*this = left;
for(uint32 i = 0; i < right.m_NumPoints; i++) {
const RGBAVector &p = right.m_DataPoints[i];
AddPoint(p);
}
*this = left;
for(uint32 i = 0; i < right.m_NumPoints; i++) {
const RGBAVector &p = right.m_DataPoints[i];
AddPoint(p);
}
m_PrincipalAxisCached = false;
}
m_PrincipalAxisCached = false;
}
void RGBACluster::AddPoint(const RGBAVector &p) {
assert(m_NumPoints < kMaxNumDataPoints);
m_Total += p;
m_DataPoints[m_NumPoints++] = p;
m_PointBitString |= 1 << p.GetIdx();
assert(m_NumPoints < kMaxNumDataPoints);
m_Total += p;
m_DataPoints[m_NumPoints++] = p;
m_PointBitString |= 1 << p.GetIdx();
for(uint32 i = 0; i < kNumColorChannels; i++) {
m_Min.c[i] = min(p.c[i], m_Min.c[i]);
m_Max.c[i] = max(p.c[i], m_Max.c[i]);
}
for(uint32 i = 0; i < kNumColorChannels; i++) {
m_Min.c[i] = min(p.c[i], m_Min.c[i]);
m_Max.c[i] = max(p.c[i], m_Max.c[i]);
}
}
void RGBACluster::GetPrincipalAxis(RGBADir &axis) {
if(m_PrincipalAxisCached) {
axis = m_PrincipalAxis;
return;
}
if(m_PrincipalAxisCached) {
axis = m_PrincipalAxis;
return;
}
m_PowerMethodIterations = ::GetPrincipalAxis(
m_NumPoints,
m_DataPoints,
m_PrincipalAxis,
m_PrincipalEigenvalue,
&m_SecondEigenvalue
);
m_PowerMethodIterations = ::GetPrincipalAxis(
m_NumPoints,
m_DataPoints,
m_PrincipalAxis,
m_PrincipalEigenvalue,
&m_SecondEigenvalue
);
m_PrincipalAxisCached = true;
m_PrincipalAxisCached = true;
GetPrincipalAxis(axis);
GetPrincipalAxis(axis);
}
double RGBACluster::GetPrincipalEigenvalue() {
@ -408,74 +408,74 @@ uint32 RGBACluster::GetPowerMethodIterations() {
double RGBACluster::QuantizedError(const RGBAVector &p1, const RGBAVector &p2, uint8 nBuckets, uint32 bitMask, const RGBAVector &errorMetricVec, const int pbits[2], int *indices) const {
// nBuckets should be a power of two.
assert(nBuckets == 3 || !(nBuckets & (nBuckets - 1)));
// nBuckets should be a power of two.
assert(nBuckets == 3 || !(nBuckets & (nBuckets - 1)));
const uint8 indexPrec = (nBuckets == 3)? 3 : 8-CountBitsInMask(~(nBuckets - 1));
typedef uint32 tInterpPair[2];
typedef tInterpPair tInterpLevel[16];
const tInterpLevel *interpVals = (nBuckets == 3)? kBC7InterpolationValues : kBC7InterpolationValues + (indexPrec - 1);
const uint8 indexPrec = (nBuckets == 3)? 3 : 8-CountBitsInMask(~(nBuckets - 1));
typedef uint32 tInterpPair[2];
typedef tInterpPair tInterpLevel[16];
const tInterpLevel *interpVals = (nBuckets == 3)? kBC7InterpolationValues : kBC7InterpolationValues + (indexPrec - 1);
assert(indexPrec >= 2 && indexPrec <= 4);
assert(indexPrec >= 2 && indexPrec <= 4);
uint32 qp1, qp2;
if(pbits) {
qp1 = p1.ToPixel(bitMask, pbits[0]);
qp2 = p2.ToPixel(bitMask, pbits[1]);
}
else {
qp1 = p1.ToPixel(bitMask);
qp2 = p2.ToPixel(bitMask);
}
uint32 qp1, qp2;
if(pbits) {
qp1 = p1.ToPixel(bitMask, pbits[0]);
qp2 = p2.ToPixel(bitMask, pbits[1]);
}
else {
qp1 = p1.ToPixel(bitMask);
qp2 = p2.ToPixel(bitMask);
}
uint8 *pqp1 = (uint8 *)&qp1;
uint8 *pqp2 = (uint8 *)&qp2;
uint8 *pqp1 = (uint8 *)&qp1;
uint8 *pqp2 = (uint8 *)&qp2;
const RGBAVector metric = errorMetricVec;
const RGBAVector metric = errorMetricVec;
float totalError = 0.0;
for(uint32 i = 0; i < m_NumPoints; i++) {
float totalError = 0.0;
for(uint32 i = 0; i < m_NumPoints; i++) {
const uint32 pixel = m_DataPoints[i].ToPixel();
const uint8 *pb = (const uint8 *)(&pixel);
const uint32 pixel = m_DataPoints[i].ToPixel();
const uint8 *pb = (const uint8 *)(&pixel);
float minError = FLT_MAX;
int bestBucket = -1;
for(int j = 0; j < nBuckets; j++) {
float minError = FLT_MAX;
int bestBucket = -1;
for(int j = 0; j < nBuckets; j++) {
uint32 interp0 = (*interpVals)[j][0];
uint32 interp1 = (*interpVals)[j][1];
uint32 interp0 = (*interpVals)[j][0];
uint32 interp1 = (*interpVals)[j][1];
RGBAVector errorVec (0.0f);
for(uint32 k = 0; k < kNumColorChannels; k++) {
const uint8 ip = (((uint32(pqp1[k]) * interp0) + (uint32(pqp2[k]) * interp1) + 32) >> 6) & 0xFF;
const uint8 dist = sad(pb[k], ip);
errorVec.c[k] = kFloatConversion[dist] * metric.c[k];
}
float error = errorVec * errorVec;
if(error < minError) {
minError = error;
bestBucket = j;
}
RGBAVector errorVec (0.0f);
for(uint32 k = 0; k < kNumColorChannels; k++) {
const uint8 ip = (((uint32(pqp1[k]) * interp0) + (uint32(pqp2[k]) * interp1) + 32) >> 6) & 0xFF;
const uint8 dist = sad(pb[k], ip);
errorVec.c[k] = kFloatConversion[dist] * metric.c[k];
}
float error = errorVec * errorVec;
if(error < minError) {
minError = error;
bestBucket = j;
}
// Conceptually, once the error starts growing, it doesn't stop growing (we're moving
// farther away from the reference point along the line). Hence we can early out here.
// However, quanitzation artifacts mean that this is not ALWAYS the case, so we do suffer
// about 0.01 RMS error.
else if(error > minError) {
break;
}
}
// Conceptually, once the error starts growing, it doesn't stop growing (we're moving
// farther away from the reference point along the line). Hence we can early out here.
// However, quanitzation artifacts mean that this is not ALWAYS the case, so we do suffer
// about 0.01 RMS error.
else if(error > minError) {
break;
}
}
totalError += minError;
totalError += minError;
assert(bestBucket >= 0);
if(indices) indices[i] = bestBucket;
}
assert(bestBucket >= 0);
if(indices) indices[i] = bestBucket;
}
return totalError;
return totalError;
}
///////////////////////////////////////////////////////////////////////////////
@ -485,175 +485,174 @@ double RGBACluster::QuantizedError(const RGBAVector &p1, const RGBAVector &p2, u
///////////////////////////////////////////////////////////////////////////////
void ClampEndpoints(RGBAVector &p1, RGBAVector &p2) {
clamp(p1.r, 0.0f, 255.0f);
clamp(p1.g, 0.0f, 255.0f);
clamp(p1.b, 0.0f, 255.0f);
clamp(p1.a, 0.0f, 255.0f);
clamp(p1.r, 0.0f, 255.0f);
clamp(p1.g, 0.0f, 255.0f);
clamp(p1.b, 0.0f, 255.0f);
clamp(p1.a, 0.0f, 255.0f);
clamp(p2.r, 0.0f, 255.0f);
clamp(p2.g, 0.0f, 255.0f);
clamp(p2.b, 0.0f, 255.0f);
clamp(p2.a, 0.0f, 255.0f);
clamp(p2.r, 0.0f, 255.0f);
clamp(p2.g, 0.0f, 255.0f);
clamp(p2.b, 0.0f, 255.0f);
clamp(p2.a, 0.0f, 255.0f);
}
static uint32 PowerIteration(const RGBAMatrix &mat, RGBADir &eigVec, double &eigVal) {
int numIterations = 0;
const int kMaxNumIterations = 200;
int numIterations = 0;
const int kMaxNumIterations = 200;
for(int nTries = 0; nTries < 3; nTries++) {
// !SPEED! Find eigenvectors by using the power method. This is good because the
// matrix is only 4x4, which allows us to use SIMD...
for(int nTries = 0; nTries < 3; nTries++) {
// !SPEED! Find eigenvectors by using the power method. This is good because the
// matrix is only 4x4, which allows us to use SIMD...
RGBAVector b = RGBAVector(float(rand()) + 1.0f);
b /= b.Length();
bool fixed = false;
numIterations = 0;
while(!fixed && ++numIterations < kMaxNumIterations) {
bool fixed = false;
numIterations = 0;
while(!fixed && ++numIterations < kMaxNumIterations) {
RGBAVector newB = mat * b;
RGBAVector newB = mat * b;
// !HACK! If the principal eigenvector of the covariance matrix
// converges to zero, that means that the points lie equally
// spaced on a sphere in this space. In this (extremely rare)
// situation, just choose a point and use it as the principal
// direction.
const float newBlen = newB.Length();
if(newBlen < 1e-10) {
eigVec = b;
eigVal = 0.0;
return numIterations;
}
// !HACK! If the principal eigenvector of the covariance matrix
// converges to zero, that means that the points lie equally
// spaced on a sphere in this space. In this (extremely rare)
// situation, just choose a point and use it as the principal
// direction.
const float newBlen = newB.Length();
if(newBlen < 1e-10) {
eigVec = b;
eigVal = 0.0;
return numIterations;
}
eigVal = newB.Length();
newB /= float(eigVal);
eigVal = newB.Length();
newB /= float(eigVal);
if(fabs(1.0f - (b * newB)) < 1e-5)
fixed = true;
if(fabs(1.0f - (b * newB)) < 1e-5)
fixed = true;
b = newB;
}
eigVec = b;
if(numIterations < kMaxNumIterations) {
break;
}
b = newB;
}
if(numIterations == kMaxNumIterations) {
eigVal = 0.0;
}
return numIterations;
eigVec = b;
if(numIterations < kMaxNumIterations) {
break;
}
}
if(numIterations == kMaxNumIterations) {
eigVal = 0.0;
}
return numIterations;
}
uint32 GetPrincipalAxis(uint32 nPts, const RGBAVector *pts, RGBADir &axis, double &eigOne, double *eigTwo) {
assert(nPts <= kMaxNumDataPoints);
assert(nPts <= kMaxNumDataPoints);
RGBAVector avg (0.0f);
for(uint32 i = 0; i < nPts; i++) {
avg += pts[i];
}
avg /= float(nPts);
RGBAVector avg (0.0f);
for(uint32 i = 0; i < nPts; i++) {
avg += pts[i];
}
avg /= float(nPts);
// We use these vectors for calculating the covariance matrix...
RGBAVector toPts[kMaxNumDataPoints];
RGBAVector toPtsMax(-FLT_MAX);
for(uint32 i = 0; i < nPts; i++) {
toPts[i] = pts[i] - avg;
// We use these vectors for calculating the covariance matrix...
RGBAVector toPts[kMaxNumDataPoints];
RGBAVector toPtsMax(-FLT_MAX);
for(uint32 i = 0; i < nPts; i++) {
toPts[i] = pts[i] - avg;
for(uint32 j = 0; j < kNumColorChannels; j++) {
toPtsMax.c[j] = max(toPtsMax.c[j], toPts[i].c[j]);
}
}
for(uint32 j = 0; j < kNumColorChannels; j++) {
toPtsMax.c[j] = max(toPtsMax.c[j], toPts[i].c[j]);
}
}
// Generate a list of unique points...
RGBAVector upts[kMaxNumDataPoints];
uint32 uptsIdx = 0;
for(uint32 i = 0; i < nPts; i++) {
bool hasPt = false;
for(uint32 j = 0; j < uptsIdx; j++) {
if(upts[j] == pts[i])
hasPt = true;
}
// Generate a list of unique points...
RGBAVector upts[kMaxNumDataPoints];
uint32 uptsIdx = 0;
for(uint32 i = 0; i < nPts; i++) {
bool hasPt = false;
for(uint32 j = 0; j < uptsIdx; j++) {
if(upts[j] == pts[i])
hasPt = true;
}
if(!hasPt) {
upts[uptsIdx++] = pts[i];
}
}
if(!hasPt) {
upts[uptsIdx++] = pts[i];
}
}
assert(uptsIdx > 0);
assert(uptsIdx > 0);
if(uptsIdx == 1) {
axis.r = axis.g = axis.b = axis.a = 0.0f;
return 0;
}
// Collinear?
else {
if(uptsIdx == 1) {
axis.r = axis.g = axis.b = axis.a = 0.0f;
return 0;
RGBADir dir (upts[1] - upts[0]);
bool collinear = true;
for(uint32 i = 2; i < nPts; i++) {
RGBAVector v = (upts[i] - upts[0]);
if(fabs(fabs(v*dir) - v.Length()) > 1e-7) {
collinear = false;
break;
}
}
// Collinear?
} else {
RGBADir dir (upts[1] - upts[0]);
bool collinear = true;
for(uint32 i = 2; i < nPts; i++) {
RGBAVector v = (upts[i] - upts[0]);
if(fabs(fabs(v*dir) - v.Length()) > 1e-7) {
collinear = false;
break;
}
}
if(collinear) {
axis = dir;
return 0;
}
}
if(collinear) {
axis = dir;
return 0;
}
}
RGBAMatrix covMatrix;
RGBAMatrix covMatrix;
// Compute covariance.
for(uint32 i = 0; i < kNumColorChannels; i++) {
for(uint32 j = 0; j <= i; j++) {
// Compute covariance.
for(uint32 i = 0; i < kNumColorChannels; i++) {
for(uint32 j = 0; j <= i; j++) {
float sum = 0.0;
for(uint32 k = 0; k < nPts; k++) {
sum += toPts[k].c[i] * toPts[k].c[j];
}
float sum = 0.0;
for(uint32 k = 0; k < nPts; k++) {
sum += toPts[k].c[i] * toPts[k].c[j];
}
covMatrix(i, j) = sum / kFloatConversion[kNumColorChannels - 1];
covMatrix(j, i) = covMatrix(i, j);
}
}
uint32 iters = PowerIteration(covMatrix, axis, eigOne);
covMatrix(i, j) = sum / kFloatConversion[kNumColorChannels - 1];
covMatrix(j, i) = covMatrix(i, j);
}
}
uint32 iters = PowerIteration(covMatrix, axis, eigOne);
if(NULL != eigTwo) {
if(eigOne != 0.0) {
RGBAMatrix reduced = covMatrix - eigOne * RGBAMatrix(
axis.c[0] * axis.c[0], axis.c[0] * axis.c[1], axis.c[0] * axis.c[2], axis.c[0] * axis.c[3],
axis.c[1] * axis.c[0], axis.c[1] * axis.c[1], axis.c[1] * axis.c[2], axis.c[1] * axis.c[3],
axis.c[2] * axis.c[0], axis.c[2] * axis.c[1], axis.c[2] * axis.c[2], axis.c[2] * axis.c[3],
axis.c[3] * axis.c[0], axis.c[3] * axis.c[1], axis.c[3] * axis.c[2], axis.c[3] * axis.c[3]
);
bool allZero = true;
for(uint32 i = 0; i < 16; i++) {
if(fabs(reduced[i]) > 0.0005) {
allZero = false;
}
}
if(allZero) {
*eigTwo = 0.0;
}
else {
RGBADir dummyDir;
iters += PowerIteration(reduced, dummyDir, *eigTwo);
}
}
else {
*eigTwo = 0.0;
}
if(NULL != eigTwo) {
if(eigOne != 0.0) {
RGBAMatrix reduced = covMatrix - eigOne * RGBAMatrix(
axis.c[0] * axis.c[0], axis.c[0] * axis.c[1], axis.c[0] * axis.c[2], axis.c[0] * axis.c[3],
axis.c[1] * axis.c[0], axis.c[1] * axis.c[1], axis.c[1] * axis.c[2], axis.c[1] * axis.c[3],
axis.c[2] * axis.c[0], axis.c[2] * axis.c[1], axis.c[2] * axis.c[2], axis.c[2] * axis.c[3],
axis.c[3] * axis.c[0], axis.c[3] * axis.c[1], axis.c[3] * axis.c[2], axis.c[3] * axis.c[3]
);
bool allZero = true;
for(uint32 i = 0; i < 16; i++) {
if(fabs(reduced[i]) > 0.0005) {
allZero = false;
}
}
return iters;
if(allZero) {
*eigTwo = 0.0;
}
else {
RGBADir dummyDir;
iters += PowerIteration(reduced, dummyDir, *eigTwo);
}
}
else {
*eigTwo = 0.0;
}
}
return iters;
}

View file

@ -78,260 +78,260 @@ static const uint32 kMaxNumDataPoints = 16;
class RGBAVector {
public:
union {
struct { float r, g, b, a; };
struct { float x, y, z, w; };
float c[4];
};
union {
struct { float r, g, b, a; };
struct { float x, y, z, w; };
float c[4];
};
uint32 GetIdx() const { return idx; }
uint32 GetIdx() const { return idx; }
RGBAVector() : r(-1.0), g(-1.0), b(-1.0), a(-1.0) { }
RGBAVector(uint32 _idx, uint32 pixel) :
r(float(pixel & 0xFF)),
g(float((pixel >> 8) & 0xFF)),
b(float((pixel >> 16) & 0xFF)),
a(float((pixel >> 24) & 0xFF)),
idx(_idx)
{ }
RGBAVector() : r(-1.0), g(-1.0), b(-1.0), a(-1.0) { }
RGBAVector(uint32 _idx, uint32 pixel) :
r(float(pixel & 0xFF)),
g(float((pixel >> 8) & 0xFF)),
b(float((pixel >> 16) & 0xFF)),
a(float((pixel >> 24) & 0xFF)),
idx(_idx)
{ }
RGBAVector(float _r, float _g, float _b, float _a) :
r(_r), g(_g), b(_b), a(_a), idx(0) { }
RGBAVector(float _r, float _g, float _b, float _a) :
r(_r), g(_g), b(_b), a(_a), idx(0) { }
explicit RGBAVector(float cc) : r(cc), g(cc), b(cc), a(cc), idx(0) { }
explicit RGBAVector(float cc) : r(cc), g(cc), b(cc), a(cc), idx(0) { }
RGBAVector &operator =(const RGBAVector &other) {
this->idx = other.idx;
memcpy(c, other.c, sizeof(c));
return (*this);
}
RGBAVector &operator =(const RGBAVector &other) {
this->idx = other.idx;
memcpy(c, other.c, sizeof(c));
return (*this);
}
RGBAVector operator +(const RGBAVector &p) const {
return RGBAVector(r + p.r, g + p.g, b + p.b, a + p.a);
}
RGBAVector operator +(const RGBAVector &p) const {
return RGBAVector(r + p.r, g + p.g, b + p.b, a + p.a);
}
RGBAVector &operator +=(const RGBAVector &p) {
r += p.r; g += p.g; b += p.b; a += p.a;
return *this;
}
RGBAVector &operator +=(const RGBAVector &p) {
r += p.r; g += p.g; b += p.b; a += p.a;
return *this;
}
RGBAVector operator -(const RGBAVector &p) const {
return RGBAVector(r - p.r, g - p.g, b - p.b, a - p.a);
}
RGBAVector operator -(const RGBAVector &p) const {
return RGBAVector(r - p.r, g - p.g, b - p.b, a - p.a);
}
RGBAVector &operator -=(const RGBAVector &p) {
r -= p.r; g -= p.g; b -= p.b; a -= p.a;
return *this;
}
RGBAVector &operator -=(const RGBAVector &p) {
r -= p.r; g -= p.g; b -= p.b; a -= p.a;
return *this;
}
RGBAVector operator /(const float s) const {
return RGBAVector(r / s, g / s, b / s, a / s);
}
RGBAVector operator /(const float s) const {
return RGBAVector(r / s, g / s, b / s, a / s);
}
RGBAVector &operator /=(const float s) {
r /= s; g /= s; b /= s; a /= s;
return *this;
}
RGBAVector &operator /=(const float s) {
r /= s; g /= s; b /= s; a /= s;
return *this;
}
float operator *(const RGBAVector &p) const {
return r * p.r + g * p.g + b * p.b + a * p.a;
}
float operator *(const RGBAVector &p) const {
return r * p.r + g * p.g + b * p.b + a * p.a;
}
float Length() const {
return sqrt((*this) * (*this));
}
float Length() const {
return sqrt((*this) * (*this));
}
RGBAVector &operator *=(const RGBAVector &v) {
r *= v.r; g *= v.g; b *= v.b; a *= v.a;
return *this;
}
RGBAVector &operator *=(const RGBAVector &v) {
r *= v.r; g *= v.g; b *= v.b; a *= v.a;
return *this;
}
RGBAVector operator *(const float s) const {
return RGBAVector(r * s, g * s, b * s, a * s);
}
RGBAVector operator *(const float s) const {
return RGBAVector(r * s, g * s, b * s, a * s);
}
friend RGBAVector operator *(const float s, const RGBAVector &p) {
return RGBAVector(p.r * s, p.g * s, p.b * s, p.a * s);
}
friend RGBAVector operator *(const float s, const RGBAVector &p) {
return RGBAVector(p.r * s, p.g * s, p.b * s, p.a * s);
}
RGBAVector &operator *=(const float s) {
r *= s; g *= s; b *= s; a *= s;
return *this;
}
RGBAVector &operator *=(const float s) {
r *= s; g *= s; b *= s; a *= s;
return *this;
}
float &operator [](const int i) {
return c[i];
}
float &operator [](const int i) {
return c[i];
}
friend bool operator ==(const RGBAVector &rhs, const RGBAVector &lhs) {
const RGBAVector d = rhs - lhs;
return fabs(d.r) < 1e-7 && fabs(d.g) < 1e-7 && fabs(d.b) < 1e-7 && fabs(d.a) < 1e-7;
}
friend bool operator ==(const RGBAVector &rhs, const RGBAVector &lhs) {
const RGBAVector d = rhs - lhs;
return fabs(d.r) < 1e-7 && fabs(d.g) < 1e-7 && fabs(d.b) < 1e-7 && fabs(d.a) < 1e-7;
}
friend bool operator !=(const RGBAVector &rhs, const RGBAVector &lhs) {
return !(rhs == lhs);
}
friend bool operator !=(const RGBAVector &rhs, const RGBAVector &lhs) {
return !(rhs == lhs);
}
operator float *() {
return c;
}
operator float *() {
return c;
}
RGBAVector Cross(const RGBAVector &rhs) {
return RGBAVector(
rhs.y * z - y * rhs.z,
rhs.z * x - z * rhs.x,
rhs.x * y - x * rhs.y,
1.0f
);
}
RGBAVector Cross(const RGBAVector &rhs) {
return RGBAVector(
rhs.y * z - y * rhs.z,
rhs.z * x - z * rhs.x,
rhs.x * y - x * rhs.y,
1.0f
);
}
// Quantize this point.
uint32 ToPixel(const uint32 channelMask = 0xFFFFFFFF, const int pBit = -1) const;
// Quantize this point.
uint32 ToPixel(const uint32 channelMask = 0xFFFFFFFF, const int pBit = -1) const;
private:
uint32 idx;
uint32 idx;
};
class RGBAMatrix {
private:
union {
float m[kNumColorChannels*kNumColorChannels];
struct {
float m1, m2, m3, m4;
float m5, m6, m7, m8;
float m9, m10, m11, m12;
float m13, m14, m15, m16;
};
};
union {
float m[kNumColorChannels*kNumColorChannels];
struct {
float m1, m2, m3, m4;
float m5, m6, m7, m8;
float m9, m10, m11, m12;
float m13, m14, m15, m16;
};
};
RGBAMatrix(const float *arr) {
memcpy(m, arr, sizeof(m));
}
RGBAMatrix(const float *arr) {
memcpy(m, arr, sizeof(m));
}
public:
RGBAMatrix(
float _m1, float _m2, float _m3, float _m4,
float _m5, float _m6, float _m7, float _m8,
float _m9, float _m10, float _m11, float _m12,
float _m13, float _m14, float _m15, float _m16
) :
m1(_m1), m2(_m2), m3(_m3), m4(_m4),
m5(_m5), m6(_m6), m7(_m7), m8(_m8),
m9(_m9), m10(_m10), m11(_m11), m12(_m12),
m13(_m13), m14(_m14), m15(_m15), m16(_m16)
{ }
RGBAMatrix() :
m1(1.0f), m2(0.0f), m3(0.0f), m4(0.0f),
m5(0.0f), m6(1.0f), m7(0.0f), m8(0.0f),
m9(0.0f), m10(0.0f), m11(1.0f), m12(0.0f),
m13(0.0f), m14(0.0f), m15(0.0f), m16(1.0f)
{ }
RGBAMatrix(
float _m1, float _m2, float _m3, float _m4,
float _m5, float _m6, float _m7, float _m8,
float _m9, float _m10, float _m11, float _m12,
float _m13, float _m14, float _m15, float _m16
) :
m1(_m1), m2(_m2), m3(_m3), m4(_m4),
m5(_m5), m6(_m6), m7(_m7), m8(_m8),
m9(_m9), m10(_m10), m11(_m11), m12(_m12),
m13(_m13), m14(_m14), m15(_m15), m16(_m16)
{ }
RGBAMatrix() :
m1(1.0f), m2(0.0f), m3(0.0f), m4(0.0f),
m5(0.0f), m6(1.0f), m7(0.0f), m8(0.0f),
m9(0.0f), m10(0.0f), m11(1.0f), m12(0.0f),
m13(0.0f), m14(0.0f), m15(0.0f), m16(1.0f)
{ }
RGBAMatrix &operator =(const RGBAMatrix &other) {
memcpy(m, other.m, sizeof(m));
return (*this);
}
RGBAMatrix &operator =(const RGBAMatrix &other) {
memcpy(m, other.m, sizeof(m));
return (*this);
}
RGBAMatrix operator +(const RGBAMatrix &p) const {
float newm[kNumColorChannels*kNumColorChannels];
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] + p.m[i];
return RGBAMatrix(newm);
}
RGBAMatrix operator +(const RGBAMatrix &p) const {
float newm[kNumColorChannels*kNumColorChannels];
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] + p.m[i];
return RGBAMatrix(newm);
}
RGBAMatrix &operator +=(const RGBAMatrix &p) {
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] += p.m[i];
return *this;
}
RGBAMatrix &operator +=(const RGBAMatrix &p) {
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] += p.m[i];
return *this;
}
RGBAMatrix operator -(const RGBAMatrix &p) const {
float newm[kNumColorChannels*kNumColorChannels];
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] - p.m[i];
return RGBAMatrix(newm);
}
RGBAMatrix operator -(const RGBAMatrix &p) const {
float newm[kNumColorChannels*kNumColorChannels];
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] - p.m[i];
return RGBAMatrix(newm);
}
RGBAMatrix &operator -=(const RGBAMatrix &p) {
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] -= p.m[i];
return *this;
}
RGBAMatrix &operator -=(const RGBAMatrix &p) {
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] -= p.m[i];
return *this;
}
RGBAMatrix operator /(const float s) const {
float newm[kNumColorChannels*kNumColorChannels];
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] / s;
return RGBAMatrix(newm);
}
RGBAMatrix operator /(const float s) const {
float newm[kNumColorChannels*kNumColorChannels];
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] / s;
return RGBAMatrix(newm);
}
RGBAMatrix &operator /=(const float s) {
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] /= s;
return *this;
}
RGBAMatrix &operator /=(const float s) {
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] /= s;
return *this;
}
RGBAMatrix operator *(const float s) const {
float newm[kNumColorChannels*kNumColorChannels];
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] * s;
return RGBAMatrix(newm);
}
RGBAMatrix operator *(const float s) const {
float newm[kNumColorChannels*kNumColorChannels];
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] * s;
return RGBAMatrix(newm);
}
RGBAMatrix operator *(const double s) const {
float newm[kNumColorChannels*kNumColorChannels];
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = float(double(m[i]) * s);
return RGBAMatrix(newm);
}
RGBAMatrix operator *(const double s) const {
float newm[kNumColorChannels*kNumColorChannels];
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = float(double(m[i]) * s);
return RGBAMatrix(newm);
}
friend RGBAMatrix operator *(const float s, const RGBAMatrix &p) {
float newm[kNumColorChannels*kNumColorChannels];
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = p.m[i] * s;
return RGBAMatrix(newm);
}
friend RGBAMatrix operator *(const float s, const RGBAMatrix &p) {
float newm[kNumColorChannels*kNumColorChannels];
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = p.m[i] * s;
return RGBAMatrix(newm);
}
friend RGBAMatrix operator *(const double s, const RGBAMatrix &p) {
float newm[kNumColorChannels*kNumColorChannels];
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = float(double(p.m[i]) * s);
return RGBAMatrix(newm);
}
friend RGBAMatrix operator *(const double s, const RGBAMatrix &p) {
float newm[kNumColorChannels*kNumColorChannels];
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = float(double(p.m[i]) * s);
return RGBAMatrix(newm);
}
RGBAMatrix &operator *=(const float s) {
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] *= s;
return *this;
}
RGBAMatrix &operator *=(const float s) {
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] *= s;
return *this;
}
float &operator ()(const int i, const int j) {
return (*this)[i*4 + j];
}
float &operator ()(const int i, const int j) {
return (*this)[i*4 + j];
}
float &operator [](const int i) {
return m[i];
}
float &operator [](const int i) {
return m[i];
}
friend bool operator ==(const RGBAMatrix &rhs, const RGBAMatrix &lhs) {
const RGBAMatrix d = rhs - lhs;
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++)
if(d.m[i] > 1e-10)
return false;
return true;
}
friend bool operator ==(const RGBAMatrix &rhs, const RGBAMatrix &lhs) {
const RGBAMatrix d = rhs - lhs;
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++)
if(d.m[i] > 1e-10)
return false;
return true;
}
operator float *() {
return m;
}
operator float *() {
return m;
}
RGBAVector operator *(const RGBAVector &p) const;
RGBAMatrix operator *(const RGBAMatrix &mat) const;
RGBAMatrix &operator *=(const RGBAMatrix &mat);
static RGBAMatrix RotateX(float rad);
static RGBAMatrix RotateY(float rad);
static RGBAMatrix RotateZ(float rad);
static RGBAMatrix Translate(const RGBAVector &t);
bool Identity();
RGBAVector operator *(const RGBAVector &p) const;
RGBAMatrix operator *(const RGBAMatrix &mat) const;
RGBAMatrix &operator *=(const RGBAMatrix &mat);
static RGBAMatrix RotateX(float rad);
static RGBAMatrix RotateY(float rad);
static RGBAMatrix RotateZ(float rad);
static RGBAMatrix Translate(const RGBAVector &t);
bool Identity();
};
class RGBADir : public RGBAVector {
public:
RGBADir() : RGBAVector() { }
RGBADir(const RGBAVector &p) : RGBAVector(p) {
*this /= Length();
}
RGBADir() : RGBAVector() { }
RGBADir(const RGBAVector &p) : RGBAVector(p) {
*this /= Length();
}
};
// Makes sure that the values of the endpoints lie between 0 and 1.
@ -340,83 +340,83 @@ extern void ClampEndpoints(RGBAVector &p1, RGBAVector &p2);
class RGBACluster {
public:
RGBACluster() :
m_NumPoints(0), m_Total(0),
m_PointBitString(0),
m_Min(FLT_MAX),
m_Max(-FLT_MAX),
m_PrincipalAxisCached(false)
{ }
RGBACluster() :
m_NumPoints(0), m_Total(0),
m_PointBitString(0),
m_Min(FLT_MAX),
m_Max(-FLT_MAX),
m_PrincipalAxisCached(false)
{ }
RGBACluster(const RGBACluster &c) :
m_NumPoints(c.m_NumPoints),
m_Total(c.m_Total),
m_PointBitString(c.m_PointBitString),
m_Min(c.m_Min),
m_Max(c.m_Max),
m_PrincipalAxisCached(c.m_PrincipalAxisCached),
m_PrincipalEigenvalue(c.m_PrincipalEigenvalue),
m_SecondEigenvalue(c.m_SecondEigenvalue),
m_PowerMethodIterations(c.m_PowerMethodIterations),
m_PrincipalAxis(c.m_PrincipalAxis)
{
memcpy(this->m_DataPoints, c.m_DataPoints, m_NumPoints * sizeof(RGBAVector));
}
RGBACluster(const RGBACluster &c) :
m_NumPoints(c.m_NumPoints),
m_Total(c.m_Total),
m_PointBitString(c.m_PointBitString),
m_Min(c.m_Min),
m_Max(c.m_Max),
m_PrincipalAxisCached(c.m_PrincipalAxisCached),
m_PrincipalEigenvalue(c.m_PrincipalEigenvalue),
m_SecondEigenvalue(c.m_SecondEigenvalue),
m_PowerMethodIterations(c.m_PowerMethodIterations),
m_PrincipalAxis(c.m_PrincipalAxis)
{
memcpy(this->m_DataPoints, c.m_DataPoints, m_NumPoints * sizeof(RGBAVector));
}
RGBACluster(const RGBACluster &left, const RGBACluster &right);
RGBACluster(const RGBAVector &p) :
m_NumPoints(1),
m_Total(p),
m_PointBitString(0),
m_Min(p), m_Max(p),
m_PrincipalAxisCached(false)
{
m_DataPoints[0] = p;
m_PointBitString |= (1 << p.GetIdx());
}
RGBAVector GetTotal() const { return m_Total; }
const RGBAVector &GetPoint(int idx) const { return m_DataPoints[idx]; }
uint32 GetNumPoints() const { return m_NumPoints; }
RGBAVector GetAvg() const { return m_Total / float(m_NumPoints); }
const RGBAVector *GetPoints() const { return m_DataPoints; }
RGBACluster(const RGBACluster &left, const RGBACluster &right);
RGBACluster(const RGBAVector &p) :
m_NumPoints(1),
m_Total(p),
m_PointBitString(0),
m_Min(p), m_Max(p),
m_PrincipalAxisCached(false)
{
m_DataPoints[0] = p;
m_PointBitString |= (1 << p.GetIdx());
}
RGBAVector GetTotal() const { return m_Total; }
const RGBAVector &GetPoint(int idx) const { return m_DataPoints[idx]; }
uint32 GetNumPoints() const { return m_NumPoints; }
RGBAVector GetAvg() const { return m_Total / float(m_NumPoints); }
const RGBAVector *GetPoints() const { return m_DataPoints; }
void AddPoint(const RGBAVector &p);
void AddPoint(const RGBAVector &p);
void GetBoundingBox(RGBAVector &Min, RGBAVector &Max) const {
Min = m_Min, Max = m_Max;
}
void GetBoundingBox(RGBAVector &Min, RGBAVector &Max) const {
Min = m_Min, Max = m_Max;
}
// Returns the error if we were to quantize the colors right now with the given number of buckets and bit mask.
double QuantizedError(const RGBAVector &p1, const RGBAVector &p2, uint8 nBuckets, uint32 bitMask, const RGBAVector &errorMetricVec, const int pbits[2] = NULL, int *indices = NULL) const;
// Returns the error if we were to quantize the colors right now with the given number of buckets and bit mask.
double QuantizedError(const RGBAVector &p1, const RGBAVector &p2, uint8 nBuckets, uint32 bitMask, const RGBAVector &errorMetricVec, const int pbits[2] = NULL, int *indices = NULL) const;
// Returns the principal axis for this point cluster.
double GetPrincipalEigenvalue();
double GetSecondEigenvalue();
uint32 GetPowerMethodIterations();
void GetPrincipalAxis(RGBADir &axis);
// Returns the principal axis for this point cluster.
double GetPrincipalEigenvalue();
double GetSecondEigenvalue();
uint32 GetPowerMethodIterations();
void GetPrincipalAxis(RGBADir &axis);
bool AllSamePoint() const { return m_Max == m_Min; }
int GetPointBitString() const { return m_PointBitString; }
bool AllSamePoint() const { return m_Max == m_Min; }
int GetPointBitString() const { return m_PointBitString; }
private:
// The number of points in the cluster.
uint32 m_NumPoints;
// The number of points in the cluster.
uint32 m_NumPoints;
RGBAVector m_Total;
RGBAVector m_Total;
// The points in the cluster.
RGBAVector m_DataPoints[kMaxNumDataPoints];
// The points in the cluster.
RGBAVector m_DataPoints[kMaxNumDataPoints];
int m_PointBitString;
RGBAVector m_Min, m_Max;
int m_PointBitString;
RGBAVector m_Min, m_Max;
bool m_PrincipalAxisCached;
double m_PrincipalEigenvalue;
double m_SecondEigenvalue;
uint32 m_PowerMethodIterations;
RGBADir m_PrincipalAxis;
bool m_PrincipalAxisCached;
double m_PrincipalEigenvalue;
double m_SecondEigenvalue;
uint32 m_PowerMethodIterations;
RGBADir m_PrincipalAxis;
};
extern uint8 QuantizeChannel(const uint8 val, const uint8 mask, const int pBit = -1);

View file

@ -92,37 +92,37 @@ static inline uint32 popcnt32(uint32 x) {
/* Original scalar implementation:
// If the mask is all the bits, then we can just return the value.
if(mask == 0xFF) {
return val;
}
// If the mask is all the bits, then we can just return the value.
if(mask == 0xFF) {
return val;
}
uint32 prec = CountBitsInMask(mask);
const uint32 step = 1 << (8 - prec);
uint32 prec = CountBitsInMask(mask);
const uint32 step = 1 << (8 - prec);
assert(step-1 == uint8(~mask));
assert(step-1 == uint8(~mask));
uint32 lval = val & mask;
uint32 hval = lval + step;
uint32 lval = val & mask;
uint32 hval = lval + step;
if(pBit >= 0) {
prec++;
lval |= !!(pBit) << (8 - prec);
hval |= !!(pBit) << (8 - prec);
}
if(pBit >= 0) {
prec++;
lval |= !!(pBit) << (8 - prec);
hval |= !!(pBit) << (8 - prec);
}
if(lval > val) {
lval -= step;
hval -= step;
}
if(lval > val) {
lval -= step;
hval -= step;
}
lval |= lval >> prec;
hval |= hval >> prec;
lval |= lval >> prec;
hval |= hval >> prec;
if(sad(val, lval) < sad(val, hval))
return lval;
else
return hval;
if(sad(val, lval) < sad(val, hval))
return lval;
else
return hval;
*/
// !TODO! AVX2 supports an instruction known as vsllv, which shifts a vector
@ -158,114 +158,114 @@ static const ALIGN_SSE uint32 kThirtyTwoVector[4] = { 32, 32, 32, 32 };
static const __m128i kByteValMask = _mm_set_epi32(0xFF, 0xFF, 0xFF, 0xFF);
static inline __m128i sad(const __m128i &a, const __m128i &b) {
const __m128i maxab = _mm_max_epu8(a, b);
const __m128i minab = _mm_min_epu8(a, b);
return _mm_and_si128( kByteValMask, _mm_subs_epu8( maxab, minab ) );
const __m128i maxab = _mm_max_epu8(a, b);
const __m128i minab = _mm_min_epu8(a, b);
return _mm_and_si128( kByteValMask, _mm_subs_epu8( maxab, minab ) );
}
__m128i RGBAVectorSIMD::ToPixel(const __m128i &qmask) const {
// !SPEED! We should figure out a way to get rid of these scalar operations.
// !SPEED! We should figure out a way to get rid of these scalar operations.
#ifdef HAS_SSE_POPCNT
const uint32 prec = _mm_popcnt_u32(((uint32 *)(&qmask))[0]);
const uint32 prec = _mm_popcnt_u32(((uint32 *)(&qmask))[0]);
#else
const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]);
const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]);
#endif
assert(r >= 0.0f && r <= 255.0f);
assert(g >= 0.0f && g <= 255.0f);
assert(b >= 0.0f && b <= 255.0f);
assert(a >= 0.0f && a <= 255.0f);
assert(((uint32 *)(&qmask))[3] == 0xFF || ((uint32 *)(&qmask))[3] == ((uint32 *)(&qmask))[0]);
assert(((uint32 *)(&qmask))[2] == ((uint32 *)(&qmask))[1] && ((uint32 *)(&qmask))[0] == ((uint32 *)(&qmask))[1]);
assert(r >= 0.0f && r <= 255.0f);
assert(g >= 0.0f && g <= 255.0f);
assert(b >= 0.0f && b <= 255.0f);
assert(a >= 0.0f && a <= 255.0f);
assert(((uint32 *)(&qmask))[3] == 0xFF || ((uint32 *)(&qmask))[3] == ((uint32 *)(&qmask))[0]);
assert(((uint32 *)(&qmask))[2] == ((uint32 *)(&qmask))[1] && ((uint32 *)(&qmask))[0] == ((uint32 *)(&qmask))[1]);
const __m128i val = _mm_cvtps_epi32( _mm_add_ps(kHalfVector, vec) );
const __m128i val = _mm_cvtps_epi32( _mm_add_ps(kHalfVector, vec) );
const __m128i step = _mm_slli_epi32( kOneVector, 8 - prec );
const __m128i &mask = qmask;
const __m128i step = _mm_slli_epi32( kOneVector, 8 - prec );
const __m128i &mask = qmask;
__m128i lval = _mm_and_si128(val, mask);
__m128i hval = _mm_add_epi32(lval, step);
__m128i lval = _mm_and_si128(val, mask);
__m128i hval = _mm_add_epi32(lval, step);
const __m128i lvalShift = _mm_srli_epi32(lval, prec);
const __m128i hvalShift = _mm_srli_epi32(hval, prec);
const __m128i lvalShift = _mm_srli_epi32(lval, prec);
const __m128i hvalShift = _mm_srli_epi32(hval, prec);
lval = _mm_or_si128(lval, lvalShift);
hval = _mm_or_si128(hval, hvalShift);
lval = _mm_or_si128(lval, lvalShift);
hval = _mm_or_si128(hval, hvalShift);
const __m128i lvald = _mm_sub_epi32( val, lval );
const __m128i hvald = _mm_sub_epi32( hval, val );
const __m128i lvald = _mm_sub_epi32( val, lval );
const __m128i hvald = _mm_sub_epi32( hval, val );
const __m128i vd = _mm_cmplt_epi32(lvald, hvald);
__m128i ans = _mm_blendv_epi8(hval, lval, vd);
const __m128i vd = _mm_cmplt_epi32(lvald, hvald);
__m128i ans = _mm_blendv_epi8(hval, lval, vd);
const __m128i chanExact = _mm_cmpeq_epi32(mask, kByteValMask);
ans = _mm_blendv_epi8( ans, val, chanExact );
return ans;
const __m128i chanExact = _mm_cmpeq_epi32(mask, kByteValMask);
ans = _mm_blendv_epi8( ans, val, chanExact );
return ans;
}
__m128i RGBAVectorSIMD::ToPixel(const __m128i &qmask, const int pBit) const {
// !SPEED! We should figure out a way to get rid of these scalar operations.
// !SPEED! We should figure out a way to get rid of these scalar operations.
#ifdef HAS_SSE_POPCNT
const uint32 prec = _mm_popcnt_u32(((uint32 *)(&qmask))[0]);
const uint32 prec = _mm_popcnt_u32(((uint32 *)(&qmask))[0]);
#else
const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]);
const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]);
#endif
assert(r >= 0.0f && r <= 255.0f);
assert(g >= 0.0f && g <= 255.0f);
assert(b >= 0.0f && b <= 255.0f);
assert(a >= 0.0f && a <= 255.0f);
assert(((uint32 *)(&qmask))[3] == 0xFF || ((uint32 *)(&qmask))[3] == ((uint32 *)(&qmask))[0]);
assert(((uint32 *)(&qmask))[2] == ((uint32 *)(&qmask))[1] && ((uint32 *)(&qmask))[0] == ((uint32 *)(&qmask))[1]);
assert(r >= 0.0f && r <= 255.0f);
assert(g >= 0.0f && g <= 255.0f);
assert(b >= 0.0f && b <= 255.0f);
assert(a >= 0.0f && a <= 255.0f);
assert(((uint32 *)(&qmask))[3] == 0xFF || ((uint32 *)(&qmask))[3] == ((uint32 *)(&qmask))[0]);
assert(((uint32 *)(&qmask))[2] == ((uint32 *)(&qmask))[1] && ((uint32 *)(&qmask))[0] == ((uint32 *)(&qmask))[1]);
const __m128i val = _mm_cvtps_epi32( _mm_add_ps(kHalfVector, vec) );
const __m128i pbit = _mm_set1_epi32(!!pBit);
const __m128i val = _mm_cvtps_epi32( _mm_add_ps(kHalfVector, vec) );
const __m128i pbit = _mm_set1_epi32(!!pBit);
const __m128i &mask = qmask; // _mm_set_epi32(alphaMask, channelMask, channelMask, channelMask);
const __m128i step = _mm_slli_epi32( kOneVector, 8 - prec );
const __m128i &mask = qmask; // _mm_set_epi32(alphaMask, channelMask, channelMask, channelMask);
const __m128i step = _mm_slli_epi32( kOneVector, 8 - prec );
__m128i lval = _mm_and_si128( val, mask );
__m128i hval = _mm_add_epi32( lval, step );
__m128i lval = _mm_and_si128( val, mask );
__m128i hval = _mm_add_epi32( lval, step );
const __m128i pBitShifted = _mm_slli_epi32(pbit, 7 - prec);
lval = _mm_or_si128(lval, pBitShifted );
hval = _mm_or_si128(hval, pBitShifted);
const __m128i pBitShifted = _mm_slli_epi32(pbit, 7 - prec);
lval = _mm_or_si128(lval, pBitShifted );
hval = _mm_or_si128(hval, pBitShifted);
// These next three lines we make sure that after adding the pbit that val is
// still in between lval and hval. If it isn't, then we subtract a
// step from both. Now, val should be larger than lval and less than
// hval, but certain situations make this not always the case (e.g. val
// is 0, precision is 4 bits, and pbit is 1). Hence, we add back the
// step if it goes below zero, making it equivalent to hval and so it
// doesn't matter which we choose.
{
__m128i cmp = _mm_cmpgt_epi32(lval, val);
cmp = _mm_mullo_epi32(cmp, step);
lval = _mm_add_epi32(lval, cmp);
hval = _mm_add_epi32(hval, cmp);
// These next three lines we make sure that after adding the pbit that val is
// still in between lval and hval. If it isn't, then we subtract a
// step from both. Now, val should be larger than lval and less than
// hval, but certain situations make this not always the case (e.g. val
// is 0, precision is 4 bits, and pbit is 1). Hence, we add back the
// step if it goes below zero, making it equivalent to hval and so it
// doesn't matter which we choose.
{
__m128i cmp = _mm_cmpgt_epi32(lval, val);
cmp = _mm_mullo_epi32(cmp, step);
lval = _mm_add_epi32(lval, cmp);
hval = _mm_add_epi32(hval, cmp);
cmp = _mm_cmplt_epi32(lval, kZeroVector);
cmp = _mm_mullo_epi32(cmp, step);
lval = _mm_sub_epi32(lval, cmp);
}
cmp = _mm_cmplt_epi32(lval, kZeroVector);
cmp = _mm_mullo_epi32(cmp, step);
lval = _mm_sub_epi32(lval, cmp);
}
const __m128i lvalShift = _mm_srli_epi32(lval, prec + 1);
const __m128i hvalShift = _mm_srli_epi32(hval, prec + 1);
const __m128i lvalShift = _mm_srli_epi32(lval, prec + 1);
const __m128i hvalShift = _mm_srli_epi32(hval, prec + 1);
lval = _mm_or_si128(lval, lvalShift);
hval = _mm_or_si128(hval, hvalShift);
lval = _mm_or_si128(lval, lvalShift);
hval = _mm_or_si128(hval, hvalShift);
const __m128i lvald = _mm_sub_epi32( val, lval );
const __m128i hvald = _mm_sub_epi32( hval, val );
const __m128i lvald = _mm_sub_epi32( val, lval );
const __m128i hvald = _mm_sub_epi32( hval, val );
const __m128i vd = _mm_cmplt_epi32(lvald, hvald);
__m128i ans = _mm_blendv_epi8(hval, lval, vd);
const __m128i vd = _mm_cmplt_epi32(lvald, hvald);
__m128i ans = _mm_blendv_epi8(hval, lval, vd);
const __m128i chanExact = _mm_cmpeq_epi32(mask, kByteValMask);
ans = _mm_blendv_epi8( ans, val, chanExact );
return ans;
const __m128i chanExact = _mm_cmpeq_epi32(mask, kByteValMask);
ans = _mm_blendv_epi8( ans, val, chanExact );
return ans;
}
///////////////////////////////////////////////////////////////////////////////
@ -275,18 +275,18 @@ __m128i RGBAVectorSIMD::ToPixel(const __m128i &qmask, const int pBit) const {
///////////////////////////////////////////////////////////////////////////////
RGBAVectorSIMD RGBAMatrixSIMD::operator *(const RGBAVectorSIMD &p) const {
__m128 xVec = _mm_set1_ps( p.x );
__m128 yVec = _mm_set1_ps( p.y );
__m128 zVec = _mm_set1_ps( p.z );
__m128 wVec = _mm_set1_ps( p.w );
__m128 xVec = _mm_set1_ps( p.x );
__m128 yVec = _mm_set1_ps( p.y );
__m128 zVec = _mm_set1_ps( p.z );
__m128 wVec = _mm_set1_ps( p.w );
__m128 vec1 = _mm_mul_ps( xVec, col[0] );
__m128 vec2 = _mm_mul_ps( yVec, col[1] );
__m128 vec3 = _mm_mul_ps( zVec, col[2] );
__m128 vec4 = _mm_mul_ps( wVec, col[3] );
__m128 vec1 = _mm_mul_ps( xVec, col[0] );
__m128 vec2 = _mm_mul_ps( yVec, col[1] );
__m128 vec3 = _mm_mul_ps( zVec, col[2] );
__m128 vec4 = _mm_mul_ps( wVec, col[3] );
return RGBAVectorSIMD( _mm_add_ps( _mm_add_ps( vec1, vec2 ), _mm_add_ps( vec3, vec4 ) ) );
return RGBAVectorSIMD( _mm_add_ps( _mm_add_ps( vec1, vec2 ), _mm_add_ps( vec3, vec4 ) ) );
}
///////////////////////////////////////////////////////////////////////////////
@ -297,104 +297,104 @@ RGBAVectorSIMD RGBAMatrixSIMD::operator *(const RGBAVectorSIMD &p) const {
RGBAClusterSIMD::RGBAClusterSIMD(const RGBAClusterSIMD &left, const RGBAClusterSIMD &right) {
assert(!(left.m_PointBitString & right.m_PointBitString));
assert(!(left.m_PointBitString & right.m_PointBitString));
*this = left;
for(int i = 0; i < right.m_NumPoints; i++) {
*this = left;
for(int i = 0; i < right.m_NumPoints; i++) {
const RGBAVectorSIMD &p = right.m_DataPoints[i];
const RGBAVectorSIMD &p = right.m_DataPoints[i];
assert(m_NumPoints < kMaxNumDataPoints);
m_Total += p;
m_DataPoints[m_NumPoints++] = p;
assert(m_NumPoints < kMaxNumDataPoints);
m_Total += p;
m_DataPoints[m_NumPoints++] = p;
m_Min.vec = _mm_min_ps(m_Min.vec, p.vec);
m_Max.vec = _mm_max_ps(m_Max.vec, p.vec);
}
m_Min.vec = _mm_min_ps(m_Min.vec, p.vec);
m_Max.vec = _mm_max_ps(m_Max.vec, p.vec);
}
m_PointBitString = left.m_PointBitString | right.m_PointBitString;
m_PrincipalAxisCached = false;
}
m_PointBitString = left.m_PointBitString | right.m_PointBitString;
m_PrincipalAxisCached = false;
}
void RGBAClusterSIMD::AddPoint(const RGBAVectorSIMD &p, int idx) {
assert(m_NumPoints < kMaxNumDataPoints);
m_Total += p;
m_DataPoints[m_NumPoints++] = p;
m_PointBitString |= 1 << idx;
assert(m_NumPoints < kMaxNumDataPoints);
m_Total += p;
m_DataPoints[m_NumPoints++] = p;
m_PointBitString |= 1 << idx;
m_Min.vec = _mm_min_ps(m_Min.vec, p.vec);
m_Max.vec = _mm_max_ps(m_Max.vec, p.vec);
m_Min.vec = _mm_min_ps(m_Min.vec, p.vec);
m_Max.vec = _mm_max_ps(m_Max.vec, p.vec);
}
float RGBAClusterSIMD::QuantizedError(const RGBAVectorSIMD &p1, const RGBAVectorSIMD &p2, const uint8 nBuckets, const __m128i &bitMask, const int pbits[2], __m128i *indices) const {
// nBuckets should be a power of two.
assert(!(nBuckets & (nBuckets - 1)));
// nBuckets should be a power of two.
assert(!(nBuckets & (nBuckets - 1)));
#ifdef HAS_SSE_POPCNT
const uint8 indexPrec = 8-_mm_popcnt_u32(~(nBuckets - 1) & 0xFF);
const uint8 indexPrec = 8-_mm_popcnt_u32(~(nBuckets - 1) & 0xFF);
#else
const uint8 indexPrec = 8-popcnt32(~(nBuckets - 1) & 0xFF);
const uint8 indexPrec = 8-popcnt32(~(nBuckets - 1) & 0xFF);
#endif
assert(indexPrec >= 2 && indexPrec <= 4);
assert(indexPrec >= 2 && indexPrec <= 4);
typedef __m128i tInterpPair[2];
typedef tInterpPair tInterpLevel[16];
const tInterpLevel *interpVals = kBC7InterpolationValuesSIMD + (indexPrec - 1);
typedef __m128i tInterpPair[2];
typedef tInterpPair tInterpLevel[16];
const tInterpLevel *interpVals = kBC7InterpolationValuesSIMD + (indexPrec - 1);
__m128i qp1, qp2;
if(pbits) {
qp1 = p1.ToPixel(bitMask, pbits[0]);
qp2 = p2.ToPixel(bitMask, pbits[1]);
}
else {
qp1 = p1.ToPixel(bitMask);
qp2 = p2.ToPixel(bitMask);
}
__m128i qp1, qp2;
if(pbits) {
qp1 = p1.ToPixel(bitMask, pbits[0]);
qp2 = p2.ToPixel(bitMask, pbits[1]);
}
else {
qp1 = p1.ToPixel(bitMask);
qp2 = p2.ToPixel(bitMask);
}
__m128 errorMetricVec = _mm_load_ps( BC7C::GetErrorMetric() );
__m128 errorMetricVec = _mm_load_ps( BC7C::GetErrorMetric() );
__m128 totalError = kZero;
for(int i = 0; i < m_NumPoints; i++) {
__m128 totalError = kZero;
for(int i = 0; i < m_NumPoints; i++) {
const __m128i pixel = m_DataPoints[i].ToPixel( kByteValMask );
const __m128i pixel = m_DataPoints[i].ToPixel( kByteValMask );
__m128 minError = _mm_set1_ps(FLT_MAX);
__m128i bestBucket = _mm_set1_epi32(-1);
for(int j = 0; j < nBuckets; j++) {
__m128 minError = _mm_set1_ps(FLT_MAX);
__m128i bestBucket = _mm_set1_epi32(-1);
for(int j = 0; j < nBuckets; j++) {
const __m128i jVec = _mm_set1_epi32(j);
const __m128i interp0 = (*interpVals)[j][0];
const __m128i interp1 = (*interpVals)[j][1];
const __m128i jVec = _mm_set1_epi32(j);
const __m128i interp0 = (*interpVals)[j][0];
const __m128i interp1 = (*interpVals)[j][1];
const __m128i ip0 = _mm_mullo_epi32( qp1, interp0 );
const __m128i ip1 = _mm_mullo_epi32( qp2, interp1 );
const __m128i ip = _mm_add_epi32( *((const __m128i *)kThirtyTwoVector), _mm_add_epi32( ip0, ip1 ) );
const __m128i dist = sad( _mm_and_si128( _mm_srli_epi32( ip, 6 ), kByteValMask ), pixel );
__m128 errorVec = _mm_cvtepi32_ps( dist );
errorVec = _mm_mul_ps( errorVec, errorMetricVec );
errorVec = _mm_mul_ps( errorVec, errorVec );
errorVec = _mm_hadd_ps( errorVec, errorVec );
errorVec = _mm_hadd_ps( errorVec, errorVec );
const __m128i ip0 = _mm_mullo_epi32( qp1, interp0 );
const __m128i ip1 = _mm_mullo_epi32( qp2, interp1 );
const __m128i ip = _mm_add_epi32( *((const __m128i *)kThirtyTwoVector), _mm_add_epi32( ip0, ip1 ) );
const __m128i dist = sad( _mm_and_si128( _mm_srli_epi32( ip, 6 ), kByteValMask ), pixel );
__m128 errorVec = _mm_cvtepi32_ps( dist );
errorVec = _mm_mul_ps( errorVec, errorMetricVec );
errorVec = _mm_mul_ps( errorVec, errorVec );
errorVec = _mm_hadd_ps( errorVec, errorVec );
errorVec = _mm_hadd_ps( errorVec, errorVec );
const __m128 cmp = _mm_cmple_ps( errorVec, minError );
minError = _mm_blendv_ps( minError, errorVec, cmp );
bestBucket = _mm_blendv_epi8( bestBucket, jVec, _mm_castps_si128( cmp ) );
const __m128 cmp = _mm_cmple_ps( errorVec, minError );
minError = _mm_blendv_ps( minError, errorVec, cmp );
bestBucket = _mm_blendv_epi8( bestBucket, jVec, _mm_castps_si128( cmp ) );
// Conceptually, once the error starts growing, it doesn't stop growing (we're moving
// farther away from the reference point along the line). Hence we can early out here.
// However, quanitzation artifacts mean that this is not ALWAYS the case, so we do suffer
// about 0.01 RMS error.
if(!((uint8 *)(&cmp))[0])
break;
}
// Conceptually, once the error starts growing, it doesn't stop growing (we're moving
// farther away from the reference point along the line). Hence we can early out here.
// However, quanitzation artifacts mean that this is not ALWAYS the case, so we do suffer
// about 0.01 RMS error.
if(!((uint8 *)(&cmp))[0])
break;
}
totalError = _mm_add_ps(totalError, minError);
if(indices) ((uint32 *)indices)[i] = ((uint32 *)(&bestBucket))[0];
}
totalError = _mm_add_ps(totalError, minError);
if(indices) ((uint32 *)indices)[i] = ((uint32 *)(&bestBucket))[0];
}
return ((float *)(&totalError))[0];
return ((float *)(&totalError))[0];
}
///////////////////////////////////////////////////////////////////////////////
@ -404,69 +404,69 @@ float RGBAClusterSIMD::QuantizedError(const RGBAVectorSIMD &p1, const RGBAVector
///////////////////////////////////////////////////////////////////////////////
void ClampEndpoints(RGBAVectorSIMD &p1, RGBAVectorSIMD &p2) {
p1.vec = _mm_min_ps( kByteMax, _mm_max_ps( p1.vec, kZero ) );
p2.vec = _mm_min_ps( kByteMax, _mm_max_ps( p2.vec, kZero ) );
p1.vec = _mm_min_ps( kByteMax, _mm_max_ps( p1.vec, kZero ) );
p2.vec = _mm_min_ps( kByteMax, _mm_max_ps( p2.vec, kZero ) );
}
void GetPrincipalAxis(const RGBAClusterSIMD &c, RGBADirSIMD &axis) {
if(c.GetNumPoints() == 2) {
axis = c.GetPoint(1) - c.GetPoint(0);
return;
}
if(c.GetNumPoints() == 2) {
axis = c.GetPoint(1) - c.GetPoint(0);
return;
}
RGBAVectorSIMD avg = c.GetTotal();
avg /= float(c.GetNumPoints());
RGBAVectorSIMD avg = c.GetTotal();
avg /= float(c.GetNumPoints());
// We use these vectors for calculating the covariance matrix...
RGBAVectorSIMD toPts[kMaxNumDataPoints];
RGBAVectorSIMD toPtsMax(-FLT_MAX);
for(int i = 0; i < c.GetNumPoints(); i++) {
toPts[i] = c.GetPoint(i) - avg;
toPtsMax.vec = _mm_max_ps(toPtsMax.vec, toPts[i].vec);
}
// We use these vectors for calculating the covariance matrix...
RGBAVectorSIMD toPts[kMaxNumDataPoints];
RGBAVectorSIMD toPtsMax(-FLT_MAX);
for(int i = 0; i < c.GetNumPoints(); i++) {
toPts[i] = c.GetPoint(i) - avg;
toPtsMax.vec = _mm_max_ps(toPtsMax.vec, toPts[i].vec);
}
RGBAMatrixSIMD covMatrix;
RGBAMatrixSIMD covMatrix;
// Compute covariance.
const float fNumPoints = float(c.GetNumPoints());
for(int i = 0; i < kNumColorChannels; i++) {
for(int j = 0; j <= i; j++) {
// Compute covariance.
const float fNumPoints = float(c.GetNumPoints());
for(int i = 0; i < kNumColorChannels; i++) {
for(int j = 0; j <= i; j++) {
float sum = 0.0;
for(int k = 0; k < c.GetNumPoints(); k++) {
sum += toPts[k].c[i] * toPts[k].c[j];
}
float sum = 0.0;
for(int k = 0; k < c.GetNumPoints(); k++) {
sum += toPts[k].c[i] * toPts[k].c[j];
}
covMatrix(i, j) = sum / fNumPoints;
covMatrix(j, i) = covMatrix(i, j);
}
}
covMatrix(i, j) = sum / fNumPoints;
covMatrix(j, i) = covMatrix(i, j);
}
}
// !SPEED! Find eigenvectors by using the power method. This is good because the
// matrix is only 4x4, which allows us to use SIMD...
RGBAVectorSIMD b = toPtsMax;
assert(b.Length() > 0);
b /= b.Length();
// !SPEED! Find eigenvectors by using the power method. This is good because the
// matrix is only 4x4, which allows us to use SIMD...
RGBAVectorSIMD b = toPtsMax;
assert(b.Length() > 0);
b /= b.Length();
RGBAVectorSIMD newB = covMatrix * b;
RGBAVectorSIMD newB = covMatrix * b;
// !HACK! If the principal eigenvector of the covariance matrix
// converges to zero, that means that the points lie equally
// spaced on a sphere in this space. In this (extremely rare)
// situation, just choose a point and use it as the principal
// direction.
const float newBlen = newB.Length();
if(newBlen < 1e-10) {
axis = toPts[0];
return;
}
// !HACK! If the principal eigenvector of the covariance matrix
// converges to zero, that means that the points lie equally
// spaced on a sphere in this space. In this (extremely rare)
// situation, just choose a point and use it as the principal
// direction.
const float newBlen = newB.Length();
if(newBlen < 1e-10) {
axis = toPts[0];
return;
}
for(int i = 0; i < 8; i++) {
newB = covMatrix * b;
newB.Normalize();
b = newB;
}
for(int i = 0; i < 8; i++) {
newB = covMatrix * b;
newB.Normalize();
b = newB;
}
axis = b;
axis = b;
}

View file

@ -81,270 +81,270 @@ static const __m128 kEpsilonSIMD = _mm_set1_ps(1e-8f);
class RGBAVectorSIMD {
public:
union {
struct { float r, g, b, a; };
struct { float x, y, z, w; };
float c[4];
__m128 vec;
};
union {
struct { float r, g, b, a; };
struct { float x, y, z, w; };
float c[4];
__m128 vec;
};
RGBAVectorSIMD() : r(-1.0), g(-1.0), b(-1.0), a(-1.0) { }
RGBAVectorSIMD(uint32 pixel) :
r(float(pixel & 0xFF)),
g(float((pixel >> 8) & 0xFF)),
b(float((pixel >> 16) & 0xFF)),
a(float((pixel >> 24) & 0xFF))
{ }
RGBAVectorSIMD() : r(-1.0), g(-1.0), b(-1.0), a(-1.0) { }
RGBAVectorSIMD(uint32 pixel) :
r(float(pixel & 0xFF)),
g(float((pixel >> 8) & 0xFF)),
b(float((pixel >> 16) & 0xFF)),
a(float((pixel >> 24) & 0xFF))
{ }
explicit RGBAVectorSIMD(float _r, float _g, float _b, float _a) :
r(_r), g(_g), b(_b), a(_a) { }
explicit RGBAVectorSIMD(float _r, float _g, float _b, float _a) :
r(_r), g(_g), b(_b), a(_a) { }
explicit RGBAVectorSIMD(float cc) : r(cc), g(cc), b(cc), a(cc) { }
explicit RGBAVectorSIMD(float cc) : r(cc), g(cc), b(cc), a(cc) { }
RGBAVectorSIMD (const __m128 &newVec) : vec(newVec) { }
RGBAVectorSIMD (const RGBAVectorSIMD &other) : vec(other.vec) { }
RGBAVectorSIMD (const __m128 &newVec) : vec(newVec) { }
RGBAVectorSIMD (const RGBAVectorSIMD &other) : vec(other.vec) { }
RGBAVectorSIMD operator +(const RGBAVectorSIMD &p) const {
return RGBAVectorSIMD( _mm_add_ps(this->vec, p.vec) );
}
RGBAVectorSIMD operator +(const RGBAVectorSIMD &p) const {
return RGBAVectorSIMD( _mm_add_ps(this->vec, p.vec) );
}
RGBAVectorSIMD &operator +=(const RGBAVectorSIMD &p) {
this->vec = _mm_add_ps(this->vec, p.vec);
return *this;
}
RGBAVectorSIMD &operator +=(const RGBAVectorSIMD &p) {
this->vec = _mm_add_ps(this->vec, p.vec);
return *this;
}
RGBAVectorSIMD operator -(const RGBAVectorSIMD &p) const {
return RGBAVectorSIMD( _mm_sub_ps(this->vec, p.vec) );
}
RGBAVectorSIMD operator -(const RGBAVectorSIMD &p) const {
return RGBAVectorSIMD( _mm_sub_ps(this->vec, p.vec) );
}
RGBAVectorSIMD &operator -=(const RGBAVectorSIMD &p) {
this->vec = _mm_sub_ps(this->vec, p.vec);
return *this;
}
RGBAVectorSIMD &operator -=(const RGBAVectorSIMD &p) {
this->vec = _mm_sub_ps(this->vec, p.vec);
return *this;
}
RGBAVectorSIMD operator /(const float s) const {
return RGBAVectorSIMD( _mm_div_ps(this->vec, _mm_set1_ps(s) ) );
}
RGBAVectorSIMD operator /(const float s) const {
return RGBAVectorSIMD( _mm_div_ps(this->vec, _mm_set1_ps(s) ) );
}
RGBAVectorSIMD &operator /=(const float s) {
this->vec = _mm_div_ps(this->vec, _mm_set1_ps(s) );
return *this;
}
RGBAVectorSIMD &operator /=(const float s) {
this->vec = _mm_div_ps(this->vec, _mm_set1_ps(s) );
return *this;
}
float operator *(const RGBAVectorSIMD &p) const {
__m128 mul = _mm_mul_ps(this->vec, p.vec);
mul = _mm_hadd_ps(mul, mul);
mul = _mm_hadd_ps(mul, mul);
return ((float *)(&mul))[0];
}
float operator *(const RGBAVectorSIMD &p) const {
__m128 mul = _mm_mul_ps(this->vec, p.vec);
mul = _mm_hadd_ps(mul, mul);
mul = _mm_hadd_ps(mul, mul);
return ((float *)(&mul))[0];
}
void Normalize() {
__m128 rsqrt = _mm_rsqrt_ps( _mm_set1_ps( (*this) * (*this) ) );
vec = _mm_mul_ps( vec, rsqrt );
}
void Normalize() {
__m128 rsqrt = _mm_rsqrt_ps( _mm_set1_ps( (*this) * (*this) ) );
vec = _mm_mul_ps( vec, rsqrt );
}
float Length() const {
return sqrt((*this) * (*this));
}
float Length() const {
return sqrt((*this) * (*this));
}
RGBAVectorSIMD &operator *=(const RGBAVectorSIMD &v) {
this->vec = _mm_mul_ps(this->vec, v.vec);
return *this;
}
RGBAVectorSIMD &operator *=(const RGBAVectorSIMD &v) {
this->vec = _mm_mul_ps(this->vec, v.vec);
return *this;
}
RGBAVectorSIMD operator *(const float s) const {
return RGBAVectorSIMD( _mm_mul_ps( this->vec, _mm_set1_ps(s) ) );
}
RGBAVectorSIMD operator *(const float s) const {
return RGBAVectorSIMD( _mm_mul_ps( this->vec, _mm_set1_ps(s) ) );
}
friend RGBAVectorSIMD operator *(const float s, const RGBAVectorSIMD &p) {
return RGBAVectorSIMD( _mm_mul_ps( p.vec, _mm_set1_ps(s) ) );
}
friend RGBAVectorSIMD operator *(const float s, const RGBAVectorSIMD &p) {
return RGBAVectorSIMD( _mm_mul_ps( p.vec, _mm_set1_ps(s) ) );
}
RGBAVectorSIMD &operator *=(const float s) {
this->vec = _mm_mul_ps( this->vec, _mm_set1_ps(s) );
return *this;
}
RGBAVectorSIMD &operator *=(const float s) {
this->vec = _mm_mul_ps( this->vec, _mm_set1_ps(s) );
return *this;
}
float &operator [](const int i) {
return c[i];
}
float &operator [](const int i) {
return c[i];
}
friend bool operator ==(const RGBAVectorSIMD &rhs, const RGBAVectorSIMD &lhs) {
__m128 d = _mm_sub_ps(rhs.vec, lhs.vec);
d = _mm_mul_ps(d, d);
__m128 cmp = _mm_cmpgt_ps(d, kEpsilonSIMD);
cmp = _mm_hadd_ps(cmp, cmp);
cmp = _mm_hadd_ps(cmp, cmp);
return ((float *)(&cmp))[0] == 0.0f;
}
friend bool operator ==(const RGBAVectorSIMD &rhs, const RGBAVectorSIMD &lhs) {
__m128 d = _mm_sub_ps(rhs.vec, lhs.vec);
d = _mm_mul_ps(d, d);
__m128 cmp = _mm_cmpgt_ps(d, kEpsilonSIMD);
cmp = _mm_hadd_ps(cmp, cmp);
cmp = _mm_hadd_ps(cmp, cmp);
return ((float *)(&cmp))[0] == 0.0f;
}
friend bool operator !=(const RGBAVectorSIMD &rhs, const RGBAVectorSIMD &lhs) {
return !(rhs == lhs);
}
friend bool operator !=(const RGBAVectorSIMD &rhs, const RGBAVectorSIMD &lhs) {
return !(rhs == lhs);
}
operator float *() {
return c;
}
operator float *() {
return c;
}
// Quantize this point.
__m128i ToPixel(const __m128i &channelMask, const int pBit) const;
__m128i ToPixel(const __m128i &channelMask) const;
// Quantize this point.
__m128i ToPixel(const __m128i &channelMask, const int pBit) const;
__m128i ToPixel(const __m128i &channelMask) const;
};
class RGBAMatrixSIMD {
private:
union {
float m[kNumColorChannels*kNumColorChannels];
struct {
float m1, m5, m9, m13;
float m2, m6, m10, m14;
float m3, m7, m11, m15;
float m4, m8, m12, m16;
};
__m128 col[kNumColorChannels];
};
union {
float m[kNumColorChannels*kNumColorChannels];
struct {
float m1, m5, m9, m13;
float m2, m6, m10, m14;
float m3, m7, m11, m15;
float m4, m8, m12, m16;
};
__m128 col[kNumColorChannels];
};
RGBAMatrixSIMD(const float *arr) {
memcpy(m, arr, sizeof(m));
}
RGBAMatrixSIMD(const float *arr) {
memcpy(m, arr, sizeof(m));
}
RGBAMatrixSIMD(const __m128 newcol[kNumColorChannels]) {
for(int i = 0; i < kNumColorChannels; i++)
col[i] = newcol[i];
}
RGBAMatrixSIMD(const __m128 newcol[kNumColorChannels]) {
for(int i = 0; i < kNumColorChannels; i++)
col[i] = newcol[i];
}
public:
RGBAMatrixSIMD() :
m1(1.0f), m2(0.0f), m3(0.0f), m4(0.0f),
m5(0.0f), m6(1.0f), m7(0.0f), m8(0.0f),
m9(0.0f), m10(0.0f), m11(1.0f), m12(0.0f),
m13(0.0f), m14(0.0f), m15(0.0f), m16(1.0f)
{ }
RGBAMatrixSIMD() :
m1(1.0f), m2(0.0f), m3(0.0f), m4(0.0f),
m5(0.0f), m6(1.0f), m7(0.0f), m8(0.0f),
m9(0.0f), m10(0.0f), m11(1.0f), m12(0.0f),
m13(0.0f), m14(0.0f), m15(0.0f), m16(1.0f)
{ }
RGBAMatrixSIMD &operator =(const RGBAMatrixSIMD &other) {
memcpy(m, other.m, sizeof(m));
return (*this);
}
RGBAMatrixSIMD &operator =(const RGBAMatrixSIMD &other) {
memcpy(m, other.m, sizeof(m));
return (*this);
}
RGBAMatrixSIMD operator +(const RGBAMatrixSIMD &p) const {
RGBAMatrixSIMD newm;
for(int i = 0; i < kNumColorChannels; i++) {
newm.col[i] = _mm_add_ps(col[i], p.col[i]);
}
return newm;
}
RGBAMatrixSIMD operator +(const RGBAMatrixSIMD &p) const {
RGBAMatrixSIMD newm;
for(int i = 0; i < kNumColorChannels; i++) {
newm.col[i] = _mm_add_ps(col[i], p.col[i]);
}
return newm;
}
RGBAMatrixSIMD &operator +=(const RGBAMatrixSIMD &p) {
for(int i = 0; i < kNumColorChannels; i++) {
col[i] = _mm_add_ps( col[i], p.col[i] );
}
return *this;
}
RGBAMatrixSIMD &operator +=(const RGBAMatrixSIMD &p) {
for(int i = 0; i < kNumColorChannels; i++) {
col[i] = _mm_add_ps( col[i], p.col[i] );
}
return *this;
}
RGBAMatrixSIMD operator -(const RGBAMatrixSIMD &p) const {
RGBAMatrixSIMD newm;
for(int i = 0; i < kNumColorChannels; i++) {
newm.col[i] = _mm_sub_ps( col[i], p.col[i] );
}
return newm;
}
RGBAMatrixSIMD operator -(const RGBAMatrixSIMD &p) const {
RGBAMatrixSIMD newm;
for(int i = 0; i < kNumColorChannels; i++) {
newm.col[i] = _mm_sub_ps( col[i], p.col[i] );
}
return newm;
}
RGBAMatrixSIMD &operator -=(const RGBAMatrixSIMD &p) {
for(int i = 0; i < kNumColorChannels; i++) {
col[i] = _mm_sub_ps( col[i], p.col[i] );
}
return *this;
}
RGBAMatrixSIMD &operator -=(const RGBAMatrixSIMD &p) {
for(int i = 0; i < kNumColorChannels; i++) {
col[i] = _mm_sub_ps( col[i], p.col[i] );
}
return *this;
}
RGBAMatrixSIMD operator /(const float s) const {
__m128 f = _mm_set1_ps(s);
RGBAMatrixSIMD newm;
RGBAMatrixSIMD operator /(const float s) const {
__m128 f = _mm_set1_ps(s);
RGBAMatrixSIMD newm;
for(int i = 0; i < kNumColorChannels; i++) {
newm.col[i] = _mm_div_ps( col[i], f );
}
for(int i = 0; i < kNumColorChannels; i++) {
newm.col[i] = _mm_div_ps( col[i], f );
}
return newm;
}
return newm;
}
RGBAMatrixSIMD &operator /=(const float s) {
RGBAMatrixSIMD &operator /=(const float s) {
__m128 f = _mm_set1_ps(s);
__m128 f = _mm_set1_ps(s);
for(int i = 0; i < kNumColorChannels; i++) {
col[i] = _mm_div_ps(col[i], f);
}
for(int i = 0; i < kNumColorChannels; i++) {
col[i] = _mm_div_ps(col[i], f);
}
return *this;
}
return *this;
}
RGBAMatrixSIMD operator *(const float s) const {
__m128 f = _mm_set1_ps(s);
RGBAMatrixSIMD operator *(const float s) const {
__m128 f = _mm_set1_ps(s);
RGBAMatrixSIMD newm;
for(int i = 0; i < kNumColorChannels; i++) {
newm.col[i] = _mm_mul_ps( col[i], f );
}
return newm;
}
RGBAMatrixSIMD newm;
for(int i = 0; i < kNumColorChannels; i++) {
newm.col[i] = _mm_mul_ps( col[i], f );
}
return newm;
}
friend RGBAMatrixSIMD operator *(const float s, const RGBAMatrixSIMD &p) {
__m128 f = _mm_set1_ps(s);
RGBAMatrixSIMD newm;
friend RGBAMatrixSIMD operator *(const float s, const RGBAMatrixSIMD &p) {
__m128 f = _mm_set1_ps(s);
RGBAMatrixSIMD newm;
for(int i = 0; i < kNumColorChannels; i++) {
newm.col[i] = _mm_mul_ps( p.col[i], f );
}
return newm;
}
for(int i = 0; i < kNumColorChannels; i++) {
newm.col[i] = _mm_mul_ps( p.col[i], f );
}
return newm;
}
RGBAMatrixSIMD &operator *=(const float s) {
__m128 f = _mm_set1_ps(s);
for(int i = 0; i < kNumColorChannels; i++)
col[i] = _mm_mul_ps(col[i], f);
return *this;
}
RGBAMatrixSIMD &operator *=(const float s) {
__m128 f = _mm_set1_ps(s);
for(int i = 0; i < kNumColorChannels; i++)
col[i] = _mm_mul_ps(col[i], f);
return *this;
}
float &operator ()(const int i, const int j) {
return (*this)[j*4 + i];
}
float &operator ()(const int i, const int j) {
return (*this)[j*4 + i];
}
float &operator [](const int i) {
return m[i];
}
float &operator [](const int i) {
return m[i];
}
friend bool operator ==(const RGBAMatrixSIMD &rhs, const RGBAMatrixSIMD &lhs) {
__m128 sum = _mm_set1_ps(0.0f);
for(int i = 0; i < kNumColorChannels; i++) {
__m128 d = _mm_sub_ps(rhs.col[i], lhs.col[i]);
d = _mm_mul_ps(d, d);
__m128 cmp = _mm_cmpgt_ps(d, kEpsilonSIMD);
cmp = _mm_hadd_ps(cmp, cmp);
cmp = _mm_hadd_ps(cmp, cmp);
sum = _mm_add_ps(sum, cmp);
}
friend bool operator ==(const RGBAMatrixSIMD &rhs, const RGBAMatrixSIMD &lhs) {
__m128 sum = _mm_set1_ps(0.0f);
for(int i = 0; i < kNumColorChannels; i++) {
__m128 d = _mm_sub_ps(rhs.col[i], lhs.col[i]);
d = _mm_mul_ps(d, d);
__m128 cmp = _mm_cmpgt_ps(d, kEpsilonSIMD);
cmp = _mm_hadd_ps(cmp, cmp);
cmp = _mm_hadd_ps(cmp, cmp);
sum = _mm_add_ps(sum, cmp);
}
if(((float *)(&sum))[0] != 0)
return false;
else
return true;
}
if(((float *)(&sum))[0] != 0)
return false;
else
return true;
}
operator float *() {
return m;
}
operator float *() {
return m;
}
RGBAVectorSIMD operator *(const RGBAVectorSIMD &p) const;
RGBAVectorSIMD operator *(const RGBAVectorSIMD &p) const;
};
class RGBADirSIMD : public RGBAVectorSIMD {
public:
RGBADirSIMD() : RGBAVectorSIMD() { }
RGBADirSIMD(const RGBAVectorSIMD &p) : RGBAVectorSIMD(p) {
this->Normalize();
}
RGBADirSIMD() : RGBAVectorSIMD() { }
RGBADirSIMD(const RGBAVectorSIMD &p) : RGBAVectorSIMD(p) {
this->Normalize();
}
};
// Makes sure that the values of the endpoints lie between 0 and 1.
@ -353,69 +353,69 @@ extern void ClampEndpoints(RGBAVectorSIMD &p1, RGBAVectorSIMD &p2);
class RGBAClusterSIMD {
public:
RGBAClusterSIMD() :
m_NumPoints(0), m_Total(0.0f),
m_PointBitString(0),
m_Min(FLT_MAX),
m_Max(-FLT_MAX),
m_PrincipalAxisCached(false)
{ }
RGBAClusterSIMD() :
m_NumPoints(0), m_Total(0.0f),
m_PointBitString(0),
m_Min(FLT_MAX),
m_Max(-FLT_MAX),
m_PrincipalAxisCached(false)
{ }
RGBAClusterSIMD(const RGBAClusterSIMD &c) :
m_NumPoints(c.m_NumPoints),
m_Total(c.m_Total),
m_PointBitString(c.m_PointBitString),
m_Min(c.m_Min),
m_Max(c.m_Max),
m_PrincipalAxisCached(false)
{
memcpy(this->m_DataPoints, c.m_DataPoints, m_NumPoints * sizeof(RGBAVectorSIMD));
}
RGBAClusterSIMD(const RGBAClusterSIMD &c) :
m_NumPoints(c.m_NumPoints),
m_Total(c.m_Total),
m_PointBitString(c.m_PointBitString),
m_Min(c.m_Min),
m_Max(c.m_Max),
m_PrincipalAxisCached(false)
{
memcpy(this->m_DataPoints, c.m_DataPoints, m_NumPoints * sizeof(RGBAVectorSIMD));
}
RGBAClusterSIMD(const RGBAClusterSIMD &left, const RGBAClusterSIMD &right);
RGBAClusterSIMD(const RGBAVectorSIMD &p, int idx) :
m_NumPoints(1),
m_Total(p),
m_PointBitString(0),
m_Min(p), m_Max(p),
m_PrincipalAxisCached(false)
{
m_DataPoints[0] = p;
m_PointBitString |= (1 << idx);
}
RGBAVectorSIMD GetTotal() const { return m_Total; }
const RGBAVectorSIMD &GetPoint(int idx) const { return m_DataPoints[idx]; }
int GetNumPoints() const { return m_NumPoints; }
RGBAVectorSIMD GetAvg() const { return m_Total / float(m_NumPoints); }
RGBAClusterSIMD(const RGBAClusterSIMD &left, const RGBAClusterSIMD &right);
RGBAClusterSIMD(const RGBAVectorSIMD &p, int idx) :
m_NumPoints(1),
m_Total(p),
m_PointBitString(0),
m_Min(p), m_Max(p),
m_PrincipalAxisCached(false)
{
m_DataPoints[0] = p;
m_PointBitString |= (1 << idx);
}
RGBAVectorSIMD GetTotal() const { return m_Total; }
const RGBAVectorSIMD &GetPoint(int idx) const { return m_DataPoints[idx]; }
int GetNumPoints() const { return m_NumPoints; }
RGBAVectorSIMD GetAvg() const { return m_Total / float(m_NumPoints); }
void AddPoint(const RGBAVectorSIMD &p, int idx);
void AddPoint(const RGBAVectorSIMD &p, int idx);
void GetBoundingBox(RGBAVectorSIMD &Min, RGBAVectorSIMD &Max) const {
Min = m_Min, Max = m_Max;
}
void GetBoundingBox(RGBAVectorSIMD &Min, RGBAVectorSIMD &Max) const {
Min = m_Min, Max = m_Max;
}
// Returns the error if we were to quantize the colors right now with the given number of buckets and bit mask.
float QuantizedError(const RGBAVectorSIMD &p1, const RGBAVectorSIMD &p2, const uint8 nBuckets, const __m128i &bitMask, const int pbits[2] = NULL, __m128i *indices = NULL) const;
// Returns the error if we were to quantize the colors right now with the given number of buckets and bit mask.
float QuantizedError(const RGBAVectorSIMD &p1, const RGBAVectorSIMD &p2, const uint8 nBuckets, const __m128i &bitMask, const int pbits[2] = NULL, __m128i *indices = NULL) const;
bool AllSamePoint() const { return m_Max == m_Min; }
int GetPointBitString() const { return m_PointBitString; }
bool AllSamePoint() const { return m_Max == m_Min; }
int GetPointBitString() const { return m_PointBitString; }
private:
// The number of points in the cluster.
int m_NumPoints;
// The number of points in the cluster.
int m_NumPoints;
RGBAVectorSIMD m_Total;
RGBAVectorSIMD m_Total;
// The points in the cluster.
RGBAVectorSIMD m_DataPoints[kMaxNumDataPoints];
// The points in the cluster.
RGBAVectorSIMD m_DataPoints[kMaxNumDataPoints];
RGBAVectorSIMD m_Min, m_Max;
int m_PointBitString;
RGBAVectorSIMD m_Min, m_Max;
int m_PointBitString;
RGBADirSIMD m_PrincipalAxis;
bool m_PrincipalAxisCached;
RGBADirSIMD m_PrincipalAxis;
bool m_PrincipalAxisCached;
};
extern void GetPrincipalAxis(const RGBAClusterSIMD &c, RGBADirSIMD &axis);

View file

@ -64,18 +64,18 @@ void PrintUsage() {
}
void ExtractBasename(const char *filename, char *buf, uint32 bufSz) {
size_t len = strlen(filename);
const char *end = filename + len;
while(--end != filename) {
if(*end == '.')
{
uint32 numChars = int32(end - filename + 1);
uint32 toCopy = (numChars > bufSz)? bufSz : numChars;
memcpy(buf, filename, toCopy);
buf[toCopy - 1] = '\0';
return;
}
}
size_t len = strlen(filename);
const char *end = filename + len;
while(--end != filename) {
if(*end == '.')
{
uint32 numChars = int32(end - filename + 1);
uint32 toCopy = (numChars > bufSz)? bufSz : numChars;
memcpy(buf, filename, toCopy);
buf[toCopy - 1] = '\0';
return;
}
}
}
int _tmain(int argc, _TCHAR* argv[])
@ -175,7 +175,7 @@ int _tmain(int argc, _TCHAR* argv[])
if(numThreads > 1 && bSaveLog) {
bSaveLog = false;
fprintf(stderr, "WARNING: Will not save log because implementation is not thread safe.\n"
"If you'd like, send a complaint to pavel@cs.unc.edu to get this done faster.\n");
"If you'd like, send a complaint to pavel@cs.unc.edu to get this done faster.\n");
}
if(fileArg == argc) {
@ -183,16 +183,16 @@ int _tmain(int argc, _TCHAR* argv[])
exit(1);
}
char basename[256];
ExtractBasename(argv[fileArg], basename, 256);
char basename[256];
ExtractBasename(argv[fileArg], basename, 256);
ImageFile file (argv[fileArg]);
if(!file.Load()) {
if(!file.Load()) {
fprintf(stderr, "Error loading file: %s\n", argv[fileArg]);
return 1;
}
}
const Image *img = file.GetImage();
const Image *img = file.GetImage();
int numBlocks = (img->GetWidth() * img->GetHeight())/16;
BlockStatManager *statManager = NULL;
@ -224,14 +224,14 @@ int _tmain(int argc, _TCHAR* argv[])
}
if(bSaveLog) {
strcat_s(basename, ".log");
strcat_s(basename, ".log");
statManager->ToFile(basename);
basename[strlen(basename) - 4] = '\0';
basename[strlen(basename) - 4] = '\0';
}
strcat_s(basename, "-bc7.png");
Image cImg (*ci);
ImageFile cImgFile (basename, eFileFormat_PNG, cImg);
cImgFile.Write();
Image cImg (*ci);
ImageFile cImgFile (basename, eFileFormat_PNG, cImg);
cImgFile.Write();
// Cleanup
delete ci;

View file

@ -54,7 +54,7 @@ class ImageLoader;
class Image {
public:
Image(const CompressedImage &);
Image(const CompressedImage &);
Image(const ImageLoader &);
~Image();

View file

@ -165,8 +165,8 @@ BlockStatManager::~BlockStatManager() {
if(m_Mutex)
{
delete m_Mutex;
m_Mutex = 0;
delete m_Mutex;
m_Mutex = 0;
}
}
@ -206,15 +206,15 @@ void BlockStatManager::ToFile(const CHAR *filename) {
CHAR str[256];
#ifdef _MSC_VER
_sntprintf_s(str, 256, _TRUNCATE, "%d,%s\n", i, statStr);
_sntprintf_s(str, 256, _TRUNCATE, "%d,%s\n", i, statStr);
#else
snprintf(str, 256, "%d,%s\n", i, statStr);
#endif
uint32 strLen = uint32(strlen(str));
if(strLen > 255) {
str[255] = '\n';
strLen = 256;
str[255] = '\n';
strLen = 256;
}
fstr.Write((uint8 *)str, strLen);

View file

@ -70,16 +70,16 @@ CompressedImage::CompressedImage( const CompressedImage &other )
}
CompressedImage::CompressedImage(
const unsigned int width,
const unsigned int width,
const unsigned int height,
const ECompressionFormat format,
const unsigned char *data
)
: m_Width(width)
, m_Height(height)
, m_Format(format)
, m_Data(0)
, m_DataSz(0)
: m_Width(width)
, m_Height(height)
, m_Format(format)
, m_Data(0)
, m_DataSz(0)
{
InitData(data);
}
@ -94,7 +94,7 @@ void CompressedImage::InitData(const unsigned char *withData) {
case eCompressionFormat_DXT5: m_DataSz = uncompDataSz / 4; break;
case eCompressionFormat_BPTC: m_DataSz = uncompDataSz / 4; break;
}
if(m_DataSz > 0) {
m_Data = new unsigned char[m_DataSz];
memcpy(m_Data, withData, m_DataSz);

View file

@ -95,14 +95,14 @@ Image::Image(const CompressedImage &ci)
: m_Width(ci.GetWidth())
, m_Height(ci.GetHeight())
{
unsigned int bufSz = ci.GetWidth() * ci.GetHeight() * 4;
m_PixelData = new uint8[ bufSz ];
if(!m_PixelData) { fprintf(stderr, "%s\n", "Out of memory!"); return; }
unsigned int bufSz = ci.GetWidth() * ci.GetHeight() * 4;
m_PixelData = new uint8[ bufSz ];
if(!m_PixelData) { fprintf(stderr, "%s\n", "Out of memory!"); return; }
if(!ci.DecompressImage(m_PixelData, bufSz)) {
fprintf(stderr, "Error decompressing image!\n");
return;
}
if(!ci.DecompressImage(m_PixelData, bufSz)) {
fprintf(stderr, "Error decompressing image!\n");
return;
}
}
Image::Image(const ImageLoader &loader)

View file

@ -74,23 +74,23 @@ class StopWatchImpl;
class StopWatch
{
public:
StopWatch();
StopWatch(const StopWatch &);
StopWatch();
StopWatch(const StopWatch &);
~StopWatch();
~StopWatch();
StopWatch &operator=(const StopWatch &);
StopWatch &operator=(const StopWatch &);
void Start();
void Stop();
void Reset();
void Start();
void Stop();
void Reset();
double TimeInSeconds() const;
double TimeInMilliseconds() const;
double TimeInMicroseconds() const;
double TimeInSeconds() const;
double TimeInMilliseconds() const;
double TimeInMicroseconds() const;
private:
StopWatchImpl *impl;
StopWatchImpl *impl;
};
#endif // __TEXCOMP_STOP_WATCH_H__

View file

@ -101,7 +101,7 @@ double StopWatch::TimeInSeconds() const {
double StopWatch::TimeInMilliseconds() const {
return double(impl->duration) / 1e3;
}
double StopWatch::TimeInMicroseconds() const {
return double(impl->duration);
}

View file

@ -100,7 +100,7 @@ double StopWatch::TimeInSeconds() const {
double StopWatch::TimeInMilliseconds() const {
return impl->duration * 1000;
}
double StopWatch::TimeInMicroseconds() const {
return impl->duration * 1000000;
}

View file

@ -404,5 +404,5 @@ bool CompressImageData(
}
void YieldThread() {
TCThread::Yield();
TCThread::Yield();
}

View file

@ -115,7 +115,7 @@ ThreadGroup::ThreadGroup( int numThreads, const unsigned char *inBuf, unsigned i
, m_CompressedBlockSize(
(func == BC7C::Compress
#ifdef HAS_SSE_41
|| func == BC7C::CompressImageBC7SIMD
|| func == BC7C::CompressImageBC7SIMD
#endif
)?
16
@ -125,7 +125,7 @@ ThreadGroup::ThreadGroup( int numThreads, const unsigned char *inBuf, unsigned i
, m_UncompressedBlockSize(
(func == BC7C::Compress
#ifdef HAS_SSE_41
|| func == BC7C::CompressImageBC7SIMD
|| func == BC7C::CompressImageBC7SIMD
#endif
)?
64

View file

@ -81,40 +81,39 @@ void WorkerThread::operator()() {
bool quitFlag = false;
while(!quitFlag) {
switch(m_Parent->AcceptThreadData(m_ThreadIdx))
{
switch(m_Parent->AcceptThreadData(m_ThreadIdx)) {
case eAction_Quit:
{
quitFlag = true;
break;
quitFlag = true;
break;
}
case eAction_Wait:
{
TCThread::Yield();
break;
TCThread::Yield();
break;
}
case eAction_DoWork:
{
const uint8 *src = m_Parent->GetSrcForThread(m_ThreadIdx);
uint8 *dst = m_Parent->GetDstForThread(m_ThreadIdx);
const uint8 *src = m_Parent->GetSrcForThread(m_ThreadIdx);
uint8 *dst = m_Parent->GetDstForThread(m_ThreadIdx);
CompressionJob cj (src, dst, 4 * m_Parent->GetNumBlocksForThread(m_ThreadIdx), 4);
if(f)
(*f)(cj);
else
(*fStat)(cj, *statManager);
CompressionJob cj (src, dst, 4 * m_Parent->GetNumBlocksForThread(m_ThreadIdx), 4);
if(f)
(*f)(cj);
else
(*fStat)(cj, *statManager);
break;
break;
}
default:
{
fprintf(stderr, "Unrecognized thread command!\n");
quitFlag = true;
break;
fprintf(stderr, "Unrecognized thread command!\n");
quitFlag = true;
break;
}
}
}
@ -244,10 +243,10 @@ WorkerThread::EAction WorkerQueue::AcceptThreadData(uint32 threadIdx) {
if(m_NextBlock == totalBlocks) {
if(m_NumCompressions < m_TotalNumCompressions) {
if(++m_WaitingThreads == m_ActiveThreads) {
m_NextBlock = 0;
m_WaitingThreads = 0;
m_NextBlock = 0;
m_WaitingThreads = 0;
} else {
return WorkerThread::eAction_Wait;
return WorkerThread::eAction_Wait;
}
}
else {

View file

@ -1,3 +1,55 @@
/* FasTC
* Copyright (c) 2012 University of North Carolina at Chapel Hill.
* All rights reserved.
*
* Permission to use, copy, modify, and distribute this software and its
* documentation for educational, research, and non-profit purposes, without
* fee, and without a written agreement is hereby granted, provided that the
* above copyright notice, this paragraph, and the following four paragraphs
* appear in all copies.
*
* Permission to incorporate this software into commercial products may be
* obtained by contacting the authors or the Office of Technology Development
* at the University of North Carolina at Chapel Hill <otd@unc.edu>.
*
* This software program and documentation are copyrighted by the University of
* North Carolina at Chapel Hill. The software program and documentation are
* supplied "as is," without any accompanying services from the University of
* North Carolina at Chapel Hill or the authors. The University of North
* Carolina at Chapel Hill and the authors do not warrant that the operation of
* the program will be uninterrupted or error-free. The end-user understands
* that the program was developed for research purposes and is advised not to
* rely exclusively on the program for any reason.
*
* IN NO EVENT SHALL THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE
* AUTHORS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL,
* OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
* THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF NORTH CAROLINA
* AT CHAPEL HILL OR THE AUTHORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS SPECIFICALLY
* DISCLAIM ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY
* STATUTORY WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON
* AN "AS IS" BASIS, AND THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND
* THE AUTHORS HAVE NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
* ENHANCEMENTS, OR MODIFICATIONS.
*
* Please send all BUG REPORTS to <pavel@cs.unc.edu>.
*
* The authors may be contacted via:
*
* Pavel Krajcevski
* Dept of Computer Science
* 201 S Columbia St
* Frederick P. Brooks, Jr. Computer Science Bldg
* Chapel Hill, NC 27599-3175
* USA
*
* <http://gamma.cs.unc.edu/FasTC/>
*/
#include "FileStream.h"
#include <Windows.h>
@ -54,7 +106,7 @@ public:
: m_ReferenceCount(1)
{
DWORD dwDesiredAccess = GENERIC_READ;
DWORD dwDesiredAccess = GENERIC_READ;
DWORD dwOpenAction = OPEN_EXISTING;
switch(mode) {
default:
@ -71,13 +123,13 @@ public:
case eFileMode_WriteAppend:
case eFileMode_WriteBinaryAppend:
dwDesiredAccess = FILE_APPEND_DATA;
dwDesiredAccess = FILE_APPEND_DATA;
dwOpenAction = CREATE_NEW;
break;
}
m_Handle = CreateFile(filename, dwDesiredAccess, 0, NULL, dwOpenAction, FILE_ATTRIBUTE_NORMAL, NULL);
if(m_Handle == INVALID_HANDLE_VALUE) {
if(m_Handle == INVALID_HANDLE_VALUE) {
ErrorExit(TEXT("CreateFile"));
}
}
@ -145,15 +197,14 @@ FileStream::~FileStream() {
int32 FileStream::Read(uint8 *buf, uint32 bufSz) {
if(
m_Mode == eFileMode_Write ||
if(m_Mode == eFileMode_Write ||
m_Mode == eFileMode_WriteBinary ||
m_Mode == eFileMode_WriteAppend ||
m_Mode == eFileMode_WriteBinaryAppend
) {
CHAR errStr[256];
_sntprintf_s(errStr, 256, "Cannot read from file '%s': File opened for reading.", m_Filename);
OutputDebugString(errStr);
_sntprintf_s(errStr, 256, "Cannot read from file '%s': File opened for reading.", m_Filename);
OutputDebugString(errStr);
return -2;
}
@ -163,27 +214,27 @@ int32 FileStream::Read(uint8 *buf, uint32 bufSz) {
DWORD oldPosition = SetFilePointer(fp, 0, NULL, FILE_CURRENT);
if(INVALID_SET_FILE_POINTER == oldPosition) {
CHAR errStr[256];
_sntprintf_s(errStr, 256, "Error querying the file position before reading from file '%s'(0x%x).", m_Filename, GetLastError());
OutputDebugString(errStr);
return -1;
CHAR errStr[256];
_sntprintf_s(errStr, 256, "Error querying the file position before reading from file '%s'(0x%x).", m_Filename, GetLastError());
OutputDebugString(errStr);
return -1;
}
DWORD amtRead;
BOOL success = ReadFile(fp, buf, bufSz, &amtRead, NULL);
if(!success) {
CHAR errStr[256];
_sntprintf_s(errStr, 256, "Error reading from file '%s'.", m_Filename);
OutputDebugString(errStr);
CHAR errStr[256];
_sntprintf_s(errStr, 256, "Error reading from file '%s'.", m_Filename);
OutputDebugString(errStr);
return -1;
}
DWORD newPosition = SetFilePointer(fp, 0, NULL, FILE_CURRENT);
if(INVALID_SET_FILE_POINTER == newPosition) {
CHAR errStr[256];
_sntprintf_s(errStr, 256, "Error querying the file position after reading from file '%s'(0x%x).", m_Filename, GetLastError());
OutputDebugString(errStr);
return -1;
CHAR errStr[256];
_sntprintf_s(errStr, 256, "Error querying the file position after reading from file '%s'(0x%x).", m_Filename, GetLastError());
OutputDebugString(errStr);
return -1;
}
return newPosition - oldPosition;
@ -194,9 +245,9 @@ int32 FileStream::Write(const uint8 *buf, uint32 bufSz) {
m_Mode == eFileMode_Read ||
m_Mode == eFileMode_ReadBinary
) {
CHAR errStr[256];
_sntprintf_s(errStr, 256, "Cannot write to file '%s': File opened for writing.", m_Filename);
OutputDebugString(errStr);
CHAR errStr[256];
_sntprintf_s(errStr, 256, "Cannot write to file '%s': File opened for writing.", m_Filename);
OutputDebugString(errStr);
return -2;
}
@ -213,10 +264,10 @@ int32 FileStream::Write(const uint8 *buf, uint32 bufSz) {
}
if(INVALID_SET_FILE_POINTER == dwPos) {
CHAR errStr[256];
_sntprintf_s(errStr, 256, "Error querying the file position before reading to file '%s'(0x%x).", m_Filename, GetLastError());
OutputDebugString(errStr);
return -1;
CHAR errStr[256];
_sntprintf_s(errStr, 256, "Error querying the file position before reading to file '%s'(0x%x).", m_Filename, GetLastError());
OutputDebugString(errStr);
return -1;
}
while(!LockFile(fp, dwPos, 0, bufSz, 0)) Sleep(1);
@ -227,9 +278,9 @@ int32 FileStream::Write(const uint8 *buf, uint32 bufSz) {
UnlockFile(fp, dwPos, 0, bufSz, 0);
if(!success) {
CHAR errStr[256];
_sntprintf_s(errStr, 256, "Error writing to file '%s'.", m_Filename);
OutputDebugString(errStr);
CHAR errStr[256];
_sntprintf_s(errStr, 256, "Error writing to file '%s'.", m_Filename);
OutputDebugString(errStr);
return -1;
}
@ -244,17 +295,17 @@ int32 FileStream::Tell() {
DWORD pos = SetFilePointer(fp, 0, NULL, FILE_CURRENT);
if(INVALID_SET_FILE_POINTER == pos) {
CHAR errStr[256];
_sntprintf_s(errStr, 256, "Error querying the file position before reading to file '%s'(0x%x).", m_Filename, GetLastError());
OutputDebugString(errStr);
return -1;
CHAR errStr[256];
_sntprintf_s(errStr, 256, "Error querying the file position before reading to file '%s'(0x%x).", m_Filename, GetLastError());
OutputDebugString(errStr);
return -1;
}
return pos;
}
bool FileStream::Seek(uint32 offset, ESeekPosition pos) {
// We cannot seek in append mode.
if(m_Mode == eFileMode_WriteAppend || m_Mode == eFileMode_WriteBinaryAppend)
return false;
@ -264,17 +315,17 @@ bool FileStream::Seek(uint32 offset, ESeekPosition pos) {
DWORD origin = FILE_BEGIN;
switch(pos) {
default:
case eSeekPosition_Beginning:
// Do nothing
default:
case eSeekPosition_Beginning:
// Do nothing
break;
case eSeekPosition_Current:
origin = FILE_CURRENT;
case eSeekPosition_Current:
origin = FILE_CURRENT;
break;
case eSeekPosition_End:
origin = FILE_END;
case eSeekPosition_End:
origin = FILE_END;
break;
}

View file

@ -125,7 +125,7 @@ bool ImageFile::Load() {
delete m_Image;
m_Image = NULL;
}
unsigned char *rawData = ReadFileData(m_Filename);
if(rawData) {
m_Image = LoadImage(rawData);
@ -272,7 +272,7 @@ unsigned char *ImageFile::ReadFileData(const CHAR *filename) {
bool ImageFile::WriteImageDataToFile(const uint8 *data,
const uint32 dataSz,
const CHAR *filename) {
// Open a file stream and write out the data...
FileStream fstr (filename, eFileMode_WriteBinary);
if(fstr.Tell() < 0) {

View file

@ -172,54 +172,54 @@ bool ImageLoader::LoadImage() {
// For each block, visit the pixels in sequential order
for(uint32 y = i; y < i+4; y++) {
for(uint32 x = j; x < j+4; x++) {
for(uint32 x = j; x < j+4; x++) {
if(y >= m_Height || x >= m_Width) {
m_PixelData[byteIdx++] = 0; // r
m_PixelData[byteIdx++] = 0; // g
m_PixelData[byteIdx++] = 0; // b
m_PixelData[byteIdx++] = 0; // a
continue;
}
if(y >= m_Height || x >= m_Width) {
m_PixelData[byteIdx++] = 0; // r
m_PixelData[byteIdx++] = 0; // g
m_PixelData[byteIdx++] = 0; // b
m_PixelData[byteIdx++] = 0; // a
continue;
}
unsigned int redVal = GetChannelForPixel(x, y, 0);
if(redVal == INT_MAX)
return false;
unsigned int redVal = GetChannelForPixel(x, y, 0);
if(redVal == INT_MAX)
return false;
unsigned int greenVal = redVal;
unsigned int blueVal = redVal;
unsigned int greenVal = redVal;
unsigned int blueVal = redVal;
if(GetGreenChannelPrecision() > 0) {
greenVal = GetChannelForPixel(x, y, 1);
if(greenVal == INT_MAX)
return false;
}
if(GetGreenChannelPrecision() > 0) {
greenVal = GetChannelForPixel(x, y, 1);
if(greenVal == INT_MAX)
return false;
}
if(GetBlueChannelPrecision() > 0) {
blueVal = GetChannelForPixel(x, y, 2);
if(blueVal == INT_MAX)
return false;
}
if(GetBlueChannelPrecision() > 0) {
blueVal = GetChannelForPixel(x, y, 2);
if(blueVal == INT_MAX)
return false;
}
unsigned int alphaVal = 0xFF;
if(GetAlphaChannelPrecision() > 0) {
alphaVal = GetChannelForPixel(x, y, 3);
if(alphaVal == INT_MAX)
return false;
}
unsigned int alphaVal = 0xFF;
if(GetAlphaChannelPrecision() > 0) {
alphaVal = GetChannelForPixel(x, y, 3);
if(alphaVal == INT_MAX)
return false;
}
// Red channel
m_PixelData[byteIdx++] = redVal & 0xFF;
// Red channel
m_PixelData[byteIdx++] = redVal & 0xFF;
// Green channel
m_PixelData[byteIdx++] = greenVal & 0xFF;
// Green channel
m_PixelData[byteIdx++] = greenVal & 0xFF;
// Blue channel
m_PixelData[byteIdx++] = blueVal & 0xFF;
// Blue channel
m_PixelData[byteIdx++] = blueVal & 0xFF;
// Alpha channel
m_PixelData[byteIdx++] = alphaVal & 0xFF;
}
// Alpha channel
m_PixelData[byteIdx++] = alphaVal & 0xFF;
}
}
}
}

View file

@ -54,10 +54,8 @@ static void ReportError(const char *msg) {
class PNGStreamReader {
public:
static void ReadDataFromStream(
png_structp png_ptr,
png_bytep outBytes,
png_size_t byteCountToRead
static void ReadDataFromStream(png_structp png_ptr,
png_bytep outBytes, png_size_t byteCountToRead
) {
png_voidp io_ptr = png_get_io_ptr( png_ptr );
if( io_ptr == NULL ) {
@ -120,9 +118,9 @@ bool ImageLoaderPNG::ReadData() {
int colorType = -1;
if( 1 != png_get_IHDR(png_ptr, info_ptr,
(png_uint_32 *)(&m_Width), (png_uint_32 *)(&m_Height),
&bitDepth, &colorType,
NULL, NULL, NULL)
(png_uint_32 *)(&m_Width), (png_uint_32 *)(&m_Height),
&bitDepth, &colorType,
NULL, NULL, NULL)
) {
ReportError("Could not read PNG header");
png_destroy_read_struct(&png_ptr, NULL, NULL);
@ -140,33 +138,33 @@ bool ImageLoaderPNG::ReadData() {
png_bytep rowData = new png_byte[bpr];
switch(colorType) {
default:
case PNG_COLOR_TYPE_PALETTE:
ReportError("PNG color type unsupported");
png_destroy_read_struct(&png_ptr, NULL, NULL);
return false;
default:
case PNG_COLOR_TYPE_PALETTE:
ReportError("PNG color type unsupported");
png_destroy_read_struct(&png_ptr, NULL, NULL);
return false;
case PNG_COLOR_TYPE_GRAY: {
case PNG_COLOR_TYPE_GRAY: {
m_RedChannelPrecision = bitDepth;
m_RedData = new unsigned char[numPixels];
for(uint32 i = 0; i < m_Height; i++) {
png_read_row(png_ptr, rowData, NULL);
png_read_row(png_ptr, rowData, NULL);
unsigned int rowOffset = i * m_Width;
unsigned int byteIdx = 0;
for(uint32 j = 0; j < m_Width; j++) {
m_RedData[rowOffset + j] = rowData[byteIdx++];
}
unsigned int rowOffset = i * m_Width;
unsigned int byteIdx = 0;
for(uint32 j = 0; j < m_Width; j++) {
m_RedData[rowOffset + j] = rowData[byteIdx++];
}
assert(byteIdx == bpr);
assert(byteIdx == bpr);
}
}
break;
case PNG_COLOR_TYPE_RGB:
case PNG_COLOR_TYPE_RGB:
m_RedChannelPrecision = bitDepth;
m_RedData = new unsigned char[numPixels];
m_GreenChannelPrecision = bitDepth;
@ -175,23 +173,23 @@ bool ImageLoaderPNG::ReadData() {
m_BlueData = new unsigned char[numPixels];
for(uint32 i = 0; i < m_Height; i++) {
png_read_row(png_ptr, rowData, NULL);
png_read_row(png_ptr, rowData, NULL);
unsigned int rowOffset = i * m_Width;
unsigned int byteIdx = 0;
for(uint32 j = 0; j < m_Width; j++) {
m_RedData[rowOffset + j] = rowData[byteIdx++];
m_GreenData[rowOffset + j] = rowData[byteIdx++];
m_BlueData[rowOffset + j] = rowData[byteIdx++];
}
unsigned int rowOffset = i * m_Width;
unsigned int byteIdx = 0;
for(uint32 j = 0; j < m_Width; j++) {
m_RedData[rowOffset + j] = rowData[byteIdx++];
m_GreenData[rowOffset + j] = rowData[byteIdx++];
m_BlueData[rowOffset + j] = rowData[byteIdx++];
}
assert(byteIdx == bpr);
assert(byteIdx == bpr);
}
break;
case PNG_COLOR_TYPE_RGB_ALPHA:
case PNG_COLOR_TYPE_RGB_ALPHA:
m_RedChannelPrecision = bitDepth;
m_RedData = new unsigned char[numPixels];
m_GreenChannelPrecision = bitDepth;
@ -202,42 +200,42 @@ bool ImageLoaderPNG::ReadData() {
m_AlphaData = new unsigned char[numPixels];
for(uint32 i = 0; i < m_Height; i++) {
png_read_row(png_ptr, rowData, NULL);
png_read_row(png_ptr, rowData, NULL);
unsigned int rowOffset = i * m_Width;
unsigned int byteIdx = 0;
for(uint32 j = 0; j < m_Width; j++) {
m_RedData[rowOffset + j] = rowData[byteIdx++];
m_GreenData[rowOffset + j] = rowData[byteIdx++];
m_BlueData[rowOffset + j] = rowData[byteIdx++];
m_AlphaData[rowOffset + j] = rowData[byteIdx++];
}
unsigned int rowOffset = i * m_Width;
unsigned int byteIdx = 0;
for(uint32 j = 0; j < m_Width; j++) {
m_RedData[rowOffset + j] = rowData[byteIdx++];
m_GreenData[rowOffset + j] = rowData[byteIdx++];
m_BlueData[rowOffset + j] = rowData[byteIdx++];
m_AlphaData[rowOffset + j] = rowData[byteIdx++];
}
assert(byteIdx == bpr);
assert(byteIdx == bpr);
}
break;
case PNG_COLOR_TYPE_GRAY_ALPHA:
case PNG_COLOR_TYPE_GRAY_ALPHA:
m_RedChannelPrecision = bitDepth;
m_RedData = new unsigned char[numPixels];
m_AlphaChannelPrecision = bitDepth;
m_AlphaData = new unsigned char[numPixels];
for(uint32 i = 0; i < m_Height; i++) {
png_read_row(png_ptr, rowData, NULL);
png_read_row(png_ptr, rowData, NULL);
unsigned int rowOffset = i * m_Width;
unsigned int byteIdx = 0;
for(uint32 j = 0; j < m_Width; j++) {
m_RedData[rowOffset + j] = rowData[byteIdx++];
m_AlphaData[rowOffset + j] = rowData[byteIdx++];
}
unsigned int rowOffset = i * m_Width;
unsigned int byteIdx = 0;
for(uint32 j = 0; j < m_Width; j++) {
m_RedData[rowOffset + j] = rowData[byteIdx++];
m_AlphaData[rowOffset + j] = rowData[byteIdx++];
}
assert(byteIdx == bpr);
assert(byteIdx == bpr);
}
break;
}

View file

@ -66,87 +66,87 @@ public:
ImageWriterPNG &writer = *(ImageWriterPNG *)(io_ptr);
while(writer.m_StreamPosition + byteCountToWrite > writer.m_RawFileDataSz) {
uint8 *newData = new uint8[writer.m_RawFileDataSz << 1];
memcpy(newData, writer.m_RawFileData, writer.m_RawFileDataSz);
writer.m_RawFileDataSz <<= 1;
delete writer.m_RawFileData;
writer.m_RawFileData = newData;
}
while(writer.m_StreamPosition + byteCountToWrite > writer.m_RawFileDataSz) {
uint8 *newData = new uint8[writer.m_RawFileDataSz << 1];
memcpy(newData, writer.m_RawFileData, writer.m_RawFileDataSz);
writer.m_RawFileDataSz <<= 1;
delete writer.m_RawFileData;
writer.m_RawFileData = newData;
}
unsigned char *stream = &(writer.m_RawFileData[writer.m_StreamPosition]);
unsigned char *stream = &(writer.m_RawFileData[writer.m_StreamPosition]);
memcpy(stream, outBytes, byteCountToWrite);
writer.m_StreamPosition += byteCountToWrite;
}
static void FlushStream(png_structp png_ptr) { /* Do nothing... */ }
static void FlushStream(png_structp png_ptr) { /* Do nothing... */ }
};
ImageWriterPNG::ImageWriterPNG(const Image &im)
: ImageWriter(im.GetWidth(), im.GetHeight(), im.RawData())
: ImageWriter(im.GetWidth(), im.GetHeight(), im.RawData())
, m_StreamPosition(0)
{
}
bool ImageWriterPNG::WriteImage() {
png_structp png_ptr = NULL;
png_infop info_ptr = NULL;
png_byte ** row_pointers = NULL;
int pixel_size = 4;
int depth = 8;
png_structp png_ptr = NULL;
png_infop info_ptr = NULL;
png_byte ** row_pointers = NULL;
int pixel_size = 4;
int depth = 8;
png_ptr = png_create_write_struct (PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
if (png_ptr == NULL) {
return false;
}
png_ptr = png_create_write_struct (PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
if (png_ptr == NULL) {
return false;
}
info_ptr = png_create_info_struct (png_ptr);
if (info_ptr == NULL) {
png_destroy_write_struct (&png_ptr, &info_ptr);
return false;
}
info_ptr = png_create_info_struct (png_ptr);
if (info_ptr == NULL) {
png_destroy_write_struct (&png_ptr, &info_ptr);
return false;
}
/* Set image attributes. */
/* Set image attributes. */
png_set_IHDR (png_ptr,
info_ptr,
m_Width,
m_Height,
depth,
PNG_COLOR_TYPE_RGBA,
PNG_INTERLACE_NONE,
PNG_COMPRESSION_TYPE_DEFAULT,
PNG_FILTER_TYPE_DEFAULT);
png_set_IHDR (png_ptr,
info_ptr,
m_Width,
m_Height,
depth,
PNG_COLOR_TYPE_RGBA,
PNG_INTERLACE_NONE,
PNG_COMPRESSION_TYPE_DEFAULT,
PNG_FILTER_TYPE_DEFAULT);
/* Initialize rows of PNG. */
/* Initialize rows of PNG. */
row_pointers = (png_byte **)png_malloc (png_ptr, m_Height * sizeof (png_byte *));
for (uint32 y = 0; y < m_Height; ++y) {
png_byte *row = (png_byte *)png_malloc (png_ptr, sizeof (uint8) * m_Width * pixel_size);
row_pointers = (png_byte **)png_malloc (png_ptr, m_Height * sizeof (png_byte *));
for (uint32 y = 0; y < m_Height; ++y) {
png_byte *row = (png_byte *)png_malloc (png_ptr, sizeof (uint8) * m_Width * pixel_size);
row_pointers[y] = row;
row_pointers[y] = row;
for (uint32 x = 0; x < m_Width; ++x) {
for(uint32 ch = 0; ch < 4; ch++) {
*row++ = GetChannelForPixel(x, y, ch);
}
for (uint32 x = 0; x < m_Width; ++x) {
for(uint32 ch = 0; ch < 4; ch++) {
*row++ = GetChannelForPixel(x, y, ch);
}
}
}
}
png_set_write_fn(png_ptr, this, PNGStreamWriter::WriteDataToStream, PNGStreamWriter::FlushStream);
png_set_rows (png_ptr, info_ptr, row_pointers);
png_write_png (png_ptr, info_ptr, PNG_TRANSFORM_IDENTITY, NULL);
png_set_write_fn(png_ptr, this, PNGStreamWriter::WriteDataToStream, PNGStreamWriter::FlushStream);
png_set_rows (png_ptr, info_ptr, row_pointers);
png_write_png (png_ptr, info_ptr, PNG_TRANSFORM_IDENTITY, NULL);
for (uint32 y = 0; y < m_Height; y++) {
png_free (png_ptr, row_pointers[y]);
}
png_free (png_ptr, row_pointers);
for (uint32 y = 0; y < m_Height; y++) {
png_free (png_ptr, row_pointers[y]);
}
png_free (png_ptr, row_pointers);
png_destroy_write_struct (&png_ptr, &info_ptr);
png_destroy_write_struct (&png_ptr, &info_ptr);
m_RawFileDataSz = m_StreamPosition;
return true;
m_RawFileDataSz = m_StreamPosition;
return true;
}

View file

@ -55,8 +55,8 @@ class ImageWriterPNG : public ImageWriter {
virtual bool WriteImage();
private:
uint32 m_StreamPosition;
friend class PNGStreamWriter;
uint32 m_StreamPosition;
friend class PNGStreamWriter;
};
#endif // _IMAGE_LOADER_H_