mirror of
https://github.com/yuzu-emu/FasTC.git
synced 2025-01-09 23:45:38 +00:00
Get rid of evil tabs once and forever (from cpp/h files)
This commit is contained in:
parent
af2318027b
commit
03a7934644
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -89,22 +89,22 @@ static T max(const T &a, const T &b) {
|
||||||
|
|
||||||
static const double kPi = 3.141592653589793238462643383279502884197;
|
static const double kPi = 3.141592653589793238462643383279502884197;
|
||||||
static const float kFloatConversion[256] = {
|
static const float kFloatConversion[256] = {
|
||||||
0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f,
|
0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f,
|
||||||
16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f,
|
16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f,
|
||||||
32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f,
|
32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f,
|
||||||
48.0f, 49.0f, 50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, 61.0f, 62.0f, 63.0f,
|
48.0f, 49.0f, 50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, 61.0f, 62.0f, 63.0f,
|
||||||
64.0f, 65.0f, 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, 76.0f, 77.0f, 78.0f, 79.0f,
|
64.0f, 65.0f, 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, 76.0f, 77.0f, 78.0f, 79.0f,
|
||||||
80.0f, 81.0f, 82.0f, 83.0f, 84.0f, 85.0f, 86.0f, 87.0f, 88.0f, 89.0f, 90.0f, 91.0f, 92.0f, 93.0f, 94.0f, 95.0f,
|
80.0f, 81.0f, 82.0f, 83.0f, 84.0f, 85.0f, 86.0f, 87.0f, 88.0f, 89.0f, 90.0f, 91.0f, 92.0f, 93.0f, 94.0f, 95.0f,
|
||||||
96.0f, 97.0f, 98.0f, 99.0f, 100.0f, 101.0f, 102.0f, 103.0f, 104.0f, 105.0f, 106.0f, 107.0f, 108.0f, 109.0f, 110.0f, 111.0f,
|
96.0f, 97.0f, 98.0f, 99.0f, 100.0f, 101.0f, 102.0f, 103.0f, 104.0f, 105.0f, 106.0f, 107.0f, 108.0f, 109.0f, 110.0f, 111.0f,
|
||||||
112.0f, 113.0f, 114.0f, 115.0f, 116.0f, 117.0f, 118.0f, 119.0f, 120.0f, 121.0f, 122.0f, 123.0f, 124.0f, 125.0f, 126.0f, 127.0f,
|
112.0f, 113.0f, 114.0f, 115.0f, 116.0f, 117.0f, 118.0f, 119.0f, 120.0f, 121.0f, 122.0f, 123.0f, 124.0f, 125.0f, 126.0f, 127.0f,
|
||||||
128.0f, 129.0f, 130.0f, 131.0f, 132.0f, 133.0f, 134.0f, 135.0f, 136.0f, 137.0f, 138.0f, 139.0f, 140.0f, 141.0f, 142.0f, 143.0f,
|
128.0f, 129.0f, 130.0f, 131.0f, 132.0f, 133.0f, 134.0f, 135.0f, 136.0f, 137.0f, 138.0f, 139.0f, 140.0f, 141.0f, 142.0f, 143.0f,
|
||||||
144.0f, 145.0f, 146.0f, 147.0f, 148.0f, 149.0f, 150.0f, 151.0f, 152.0f, 153.0f, 154.0f, 155.0f, 156.0f, 157.0f, 158.0f, 159.0f,
|
144.0f, 145.0f, 146.0f, 147.0f, 148.0f, 149.0f, 150.0f, 151.0f, 152.0f, 153.0f, 154.0f, 155.0f, 156.0f, 157.0f, 158.0f, 159.0f,
|
||||||
160.0f, 161.0f, 162.0f, 163.0f, 164.0f, 165.0f, 166.0f, 167.0f, 168.0f, 169.0f, 170.0f, 171.0f, 172.0f, 173.0f, 174.0f, 175.0f,
|
160.0f, 161.0f, 162.0f, 163.0f, 164.0f, 165.0f, 166.0f, 167.0f, 168.0f, 169.0f, 170.0f, 171.0f, 172.0f, 173.0f, 174.0f, 175.0f,
|
||||||
176.0f, 177.0f, 178.0f, 179.0f, 180.0f, 181.0f, 182.0f, 183.0f, 184.0f, 185.0f, 186.0f, 187.0f, 188.0f, 189.0f, 190.0f, 191.0f,
|
176.0f, 177.0f, 178.0f, 179.0f, 180.0f, 181.0f, 182.0f, 183.0f, 184.0f, 185.0f, 186.0f, 187.0f, 188.0f, 189.0f, 190.0f, 191.0f,
|
||||||
192.0f, 193.0f, 194.0f, 195.0f, 196.0f, 197.0f, 198.0f, 199.0f, 200.0f, 201.0f, 202.0f, 203.0f, 204.0f, 205.0f, 206.0f, 207.0f,
|
192.0f, 193.0f, 194.0f, 195.0f, 196.0f, 197.0f, 198.0f, 199.0f, 200.0f, 201.0f, 202.0f, 203.0f, 204.0f, 205.0f, 206.0f, 207.0f,
|
||||||
208.0f, 209.0f, 210.0f, 211.0f, 212.0f, 213.0f, 214.0f, 215.0f, 216.0f, 217.0f, 218.0f, 219.0f, 220.0f, 221.0f, 222.0f, 223.0f,
|
208.0f, 209.0f, 210.0f, 211.0f, 212.0f, 213.0f, 214.0f, 215.0f, 216.0f, 217.0f, 218.0f, 219.0f, 220.0f, 221.0f, 222.0f, 223.0f,
|
||||||
224.0f, 225.0f, 226.0f, 227.0f, 228.0f, 229.0f, 230.0f, 231.0f, 232.0f, 233.0f, 234.0f, 235.0f, 236.0f, 237.0f, 238.0f, 239.0f,
|
224.0f, 225.0f, 226.0f, 227.0f, 228.0f, 229.0f, 230.0f, 231.0f, 232.0f, 233.0f, 234.0f, 235.0f, 236.0f, 237.0f, 238.0f, 239.0f,
|
||||||
240.0f, 241.0f, 242.0f, 243.0f, 244.0f, 245.0f, 246.0f, 247.0f, 248.0f, 249.0f, 250.0f, 251.0f, 252.0f, 253.0f, 254.0f, 255.0f
|
240.0f, 241.0f, 242.0f, 243.0f, 244.0f, 245.0f, 246.0f, 247.0f, 248.0f, 249.0f, 250.0f, 251.0f, 252.0f, 253.0f, 254.0f, 255.0f
|
||||||
};
|
};
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -115,41 +115,41 @@ static const float kFloatConversion[256] = {
|
||||||
static inline uint32 CountBitsInMask(uint8 n) {
|
static inline uint32 CountBitsInMask(uint8 n) {
|
||||||
|
|
||||||
#if defined(_WIN64) || defined(__x86_64__) || defined(NO_INLINE_ASSEMBLY)
|
#if defined(_WIN64) || defined(__x86_64__) || defined(NO_INLINE_ASSEMBLY)
|
||||||
if(!n) return 0; // no bits set
|
if(!n) return 0; // no bits set
|
||||||
if(!(n & (n-1))) return 1; // power of two
|
if(!(n & (n-1))) return 1; // power of two
|
||||||
|
|
||||||
uint32 c;
|
uint32 c;
|
||||||
for(c = 0; n; c++) {
|
for(c = 0; n; c++) {
|
||||||
n &= n - 1;
|
n &= n - 1;
|
||||||
}
|
}
|
||||||
return c;
|
return c;
|
||||||
#else
|
#else
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
__asm {
|
__asm {
|
||||||
mov eax, 8
|
mov eax, 8
|
||||||
movzx ecx, n
|
movzx ecx, n
|
||||||
bsf ecx, ecx
|
bsf ecx, ecx
|
||||||
sub eax, ecx
|
sub eax, ecx
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
uint32 ans;
|
uint32 ans;
|
||||||
__asm__("movl $8, %%eax;"
|
__asm__("movl $8, %%eax;"
|
||||||
"movzbl %b1, %%ecx;"
|
"movzbl %b1, %%ecx;"
|
||||||
"bsf %%ecx, %%ecx;"
|
"bsf %%ecx, %%ecx;"
|
||||||
"subl %%ecx, %%eax;"
|
"subl %%ecx, %%eax;"
|
||||||
"movl %%eax, %0;"
|
"movl %%eax, %0;"
|
||||||
: "=Q"(ans)
|
: "=Q"(ans)
|
||||||
: "b"(n)
|
: "b"(n)
|
||||||
: "%eax", "%ecx"
|
: "%eax", "%ecx"
|
||||||
);
|
);
|
||||||
return ans;
|
return ans;
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename ty>
|
template <typename ty>
|
||||||
static inline void clamp(ty &x, const ty &min, const ty &max) {
|
static inline void clamp(ty &x, const ty &min, const ty &max) {
|
||||||
x = (x < min)? min : ((x > max)? max : x);
|
x = (x < min)? min : ((x > max)? max : x);
|
||||||
}
|
}
|
||||||
|
|
||||||
// absolute distance. It turns out the compiler does a much
|
// absolute distance. It turns out the compiler does a much
|
||||||
|
@ -157,23 +157,23 @@ static inline void clamp(ty &x, const ty &min, const ty &max) {
|
||||||
// translate the values to/from registers
|
// translate the values to/from registers
|
||||||
static uint8 sad(uint8 a, uint8 b) {
|
static uint8 sad(uint8 a, uint8 b) {
|
||||||
#if 0
|
#if 0
|
||||||
__asm
|
__asm
|
||||||
{
|
{
|
||||||
movzx eax, a
|
movzx eax, a
|
||||||
movzx ecx, b
|
movzx ecx, b
|
||||||
sub eax, ecx
|
sub eax, ecx
|
||||||
jns done
|
jns done
|
||||||
neg eax
|
neg eax
|
||||||
done:
|
done:
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
//const INT d = a - b;
|
//const INT d = a - b;
|
||||||
//const INT mask = d >> 31;
|
//const INT mask = d >> 31;
|
||||||
//return (d ^ mask) - mask;
|
//return (d ^ mask) - mask;
|
||||||
|
|
||||||
// return abs(a - b);
|
// return abs(a - b);
|
||||||
|
|
||||||
return (a > b)? a - b : b - a;
|
return (a > b)? a - b : b - a;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -186,55 +186,55 @@ done:
|
||||||
|
|
||||||
uint8 QuantizeChannel(const uint8 val, const uint8 mask, const int pBit) {
|
uint8 QuantizeChannel(const uint8 val, const uint8 mask, const int pBit) {
|
||||||
|
|
||||||
// If the mask is all the bits, then we can just return the value.
|
// If the mask is all the bits, then we can just return the value.
|
||||||
if(mask == 0xFF) {
|
if(mask == 0xFF) {
|
||||||
return val;
|
return val;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Otherwise if the mask is no bits then we'll assume that they want
|
// Otherwise if the mask is no bits then we'll assume that they want
|
||||||
// all the bits ... this is only really relevant for alpha...
|
// all the bits ... this is only really relevant for alpha...
|
||||||
if(mask == 0x0) {
|
if(mask == 0x0) {
|
||||||
return 0xFF;
|
return 0xFF;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32 prec = CountBitsInMask(mask);
|
uint32 prec = CountBitsInMask(mask);
|
||||||
const uint32 step = 1 << (8 - prec);
|
const uint32 step = 1 << (8 - prec);
|
||||||
|
|
||||||
assert(step-1 == uint8(~mask));
|
assert(step-1 == uint8(~mask));
|
||||||
|
|
||||||
uint32 lval = val & mask;
|
uint32 lval = val & mask;
|
||||||
uint32 hval = lval + step;
|
uint32 hval = lval + step;
|
||||||
|
|
||||||
if(pBit >= 0) {
|
if(pBit >= 0) {
|
||||||
prec++;
|
prec++;
|
||||||
lval |= !!(pBit) << (8 - prec);
|
lval |= !!(pBit) << (8 - prec);
|
||||||
hval |= !!(pBit) << (8 - prec);
|
hval |= !!(pBit) << (8 - prec);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(lval > val) {
|
if(lval > val) {
|
||||||
lval -= step;
|
lval -= step;
|
||||||
hval -= step;
|
hval -= step;
|
||||||
}
|
}
|
||||||
|
|
||||||
lval |= lval >> prec;
|
lval |= lval >> prec;
|
||||||
hval |= hval >> prec;
|
hval |= hval >> prec;
|
||||||
|
|
||||||
if(sad(val, lval) < sad(val, hval))
|
if(sad(val, lval) < sad(val, hval))
|
||||||
return lval;
|
return lval;
|
||||||
else
|
else
|
||||||
return hval;
|
return hval;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32 RGBAVector::ToPixel(const uint32 channelMask, const int pBit) const {
|
uint32 RGBAVector::ToPixel(const uint32 channelMask, const int pBit) const {
|
||||||
|
|
||||||
const uint8 pRet0 = QuantizeChannel(uint32(r + 0.5) & 0xFF, channelMask & 0xFF, pBit);
|
const uint8 pRet0 = QuantizeChannel(uint32(r + 0.5) & 0xFF, channelMask & 0xFF, pBit);
|
||||||
const uint8 pRet1 = QuantizeChannel(uint32(g + 0.5) & 0xFF, (channelMask >> 8) & 0xFF, pBit);
|
const uint8 pRet1 = QuantizeChannel(uint32(g + 0.5) & 0xFF, (channelMask >> 8) & 0xFF, pBit);
|
||||||
const uint8 pRet2 = QuantizeChannel(uint32(b + 0.5) & 0xFF, (channelMask >> 16) & 0xFF, pBit);
|
const uint8 pRet2 = QuantizeChannel(uint32(b + 0.5) & 0xFF, (channelMask >> 16) & 0xFF, pBit);
|
||||||
const uint8 pRet3 = QuantizeChannel(uint32(a + 0.5) & 0xFF, (channelMask >> 24) & 0xFF, pBit);
|
const uint8 pRet3 = QuantizeChannel(uint32(a + 0.5) & 0xFF, (channelMask >> 24) & 0xFF, pBit);
|
||||||
|
|
||||||
const uint32 ret = pRet0 | (pRet1 << 8) | (pRet2 << 16) | (pRet3 << 24);
|
const uint32 ret = pRet0 | (pRet1 << 8) | (pRet2 << 16) | (pRet3 << 24);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -244,85 +244,85 @@ uint32 RGBAVector::ToPixel(const uint32 channelMask, const int pBit) const {
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
RGBAMatrix &RGBAMatrix::operator *=(const RGBAMatrix &mat) {
|
RGBAMatrix &RGBAMatrix::operator *=(const RGBAMatrix &mat) {
|
||||||
*this = ((*this) * mat);
|
*this = ((*this) * mat);
|
||||||
return (*this);
|
return (*this);
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAMatrix RGBAMatrix::operator *(const RGBAMatrix &mat) const {
|
RGBAMatrix RGBAMatrix::operator *(const RGBAMatrix &mat) const {
|
||||||
|
|
||||||
RGBAMatrix result;
|
RGBAMatrix result;
|
||||||
|
|
||||||
for(int i = 0; i < 4; i++) {
|
for(int i = 0; i < 4; i++) {
|
||||||
for(int j = 0; j < 4; j++) {
|
for(int j = 0; j < 4; j++) {
|
||||||
|
|
||||||
result(i, j) = 0.0f;
|
result(i, j) = 0.0f;
|
||||||
for(int k = 0; k < 4; k++) {
|
for(int k = 0; k < 4; k++) {
|
||||||
result(i, j) += m[i*4 + k] * mat.m[k*4 + j];
|
result(i, j) += m[i*4 + k] * mat.m[k*4 + j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAVector RGBAMatrix::operator *(const RGBAVector &p) const {
|
RGBAVector RGBAMatrix::operator *(const RGBAVector &p) const {
|
||||||
return RGBAVector (
|
return RGBAVector (
|
||||||
p.x * m1 + p.y * m2 + p.z * m3 + p.w * m4,
|
p.x * m1 + p.y * m2 + p.z * m3 + p.w * m4,
|
||||||
p.x * m5 + p.y * m6 + p.z * m7 + p.w * m8,
|
p.x * m5 + p.y * m6 + p.z * m7 + p.w * m8,
|
||||||
p.x * m9 + p.y * m10 + p.z * m11 + p.w * m12,
|
p.x * m9 + p.y * m10 + p.z * m11 + p.w * m12,
|
||||||
p.x * m13 + p.y * m14 + p.z * m15 + p.w * m16
|
p.x * m13 + p.y * m14 + p.z * m15 + p.w * m16
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAMatrix RGBAMatrix::RotateX(float rad) {
|
RGBAMatrix RGBAMatrix::RotateX(float rad) {
|
||||||
RGBAMatrix result;
|
RGBAMatrix result;
|
||||||
result.m6 = result.m11 = cos(rad);
|
result.m6 = result.m11 = cos(rad);
|
||||||
result.m10 = sin(rad);
|
result.m10 = sin(rad);
|
||||||
result.m7 = -result.m10;
|
result.m7 = -result.m10;
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAMatrix RGBAMatrix::RotateY(float rad) {
|
RGBAMatrix RGBAMatrix::RotateY(float rad) {
|
||||||
RGBAMatrix result;
|
RGBAMatrix result;
|
||||||
result.m1 = result.m11 = cos(rad);
|
result.m1 = result.m11 = cos(rad);
|
||||||
result.m3 = sin(rad);
|
result.m3 = sin(rad);
|
||||||
result.m9 = -result.m3;
|
result.m9 = -result.m3;
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAMatrix RGBAMatrix::RotateZ(float rad) {
|
RGBAMatrix RGBAMatrix::RotateZ(float rad) {
|
||||||
RGBAMatrix result;
|
RGBAMatrix result;
|
||||||
result.m1 = result.m6 = cos(rad);
|
result.m1 = result.m6 = cos(rad);
|
||||||
result.m5 = sin(rad);
|
result.m5 = sin(rad);
|
||||||
result.m2 = -result.m5;
|
result.m2 = -result.m5;
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAMatrix RGBAMatrix::Translate(const RGBAVector &t) {
|
RGBAMatrix RGBAMatrix::Translate(const RGBAVector &t) {
|
||||||
RGBAMatrix result;
|
RGBAMatrix result;
|
||||||
result.m4 = t.x;
|
result.m4 = t.x;
|
||||||
result.m8 = t.y;
|
result.m8 = t.y;
|
||||||
result.m12 = t.z;
|
result.m12 = t.z;
|
||||||
result.m16 = t.w;
|
result.m16 = t.w;
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RGBAMatrix::Identity() {
|
bool RGBAMatrix::Identity() {
|
||||||
for(int i = 0; i < 4; i++) {
|
for(int i = 0; i < 4; i++) {
|
||||||
for(int j = 0; j < 4; j++) {
|
for(int j = 0; j < 4; j++) {
|
||||||
|
|
||||||
if(i == j) {
|
if(i == j) {
|
||||||
if(fabs(m[i*4 + j] - 1.0f) > 1e-5)
|
if(fabs(m[i*4 + j] - 1.0f) > 1e-5)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if(fabs(m[i*4 + j]) > 1e-5)
|
if(fabs(m[i*4 + j]) > 1e-5)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -332,45 +332,45 @@ bool RGBAMatrix::Identity() {
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
RGBACluster::RGBACluster(const RGBACluster &left, const RGBACluster &right) {
|
RGBACluster::RGBACluster(const RGBACluster &left, const RGBACluster &right) {
|
||||||
*this = left;
|
*this = left;
|
||||||
for(uint32 i = 0; i < right.m_NumPoints; i++) {
|
for(uint32 i = 0; i < right.m_NumPoints; i++) {
|
||||||
const RGBAVector &p = right.m_DataPoints[i];
|
const RGBAVector &p = right.m_DataPoints[i];
|
||||||
AddPoint(p);
|
AddPoint(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
m_PrincipalAxisCached = false;
|
m_PrincipalAxisCached = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RGBACluster::AddPoint(const RGBAVector &p) {
|
void RGBACluster::AddPoint(const RGBAVector &p) {
|
||||||
assert(m_NumPoints < kMaxNumDataPoints);
|
assert(m_NumPoints < kMaxNumDataPoints);
|
||||||
m_Total += p;
|
m_Total += p;
|
||||||
m_DataPoints[m_NumPoints++] = p;
|
m_DataPoints[m_NumPoints++] = p;
|
||||||
m_PointBitString |= 1 << p.GetIdx();
|
m_PointBitString |= 1 << p.GetIdx();
|
||||||
|
|
||||||
for(uint32 i = 0; i < kNumColorChannels; i++) {
|
for(uint32 i = 0; i < kNumColorChannels; i++) {
|
||||||
m_Min.c[i] = min(p.c[i], m_Min.c[i]);
|
m_Min.c[i] = min(p.c[i], m_Min.c[i]);
|
||||||
m_Max.c[i] = max(p.c[i], m_Max.c[i]);
|
m_Max.c[i] = max(p.c[i], m_Max.c[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RGBACluster::GetPrincipalAxis(RGBADir &axis) {
|
void RGBACluster::GetPrincipalAxis(RGBADir &axis) {
|
||||||
|
|
||||||
if(m_PrincipalAxisCached) {
|
if(m_PrincipalAxisCached) {
|
||||||
axis = m_PrincipalAxis;
|
axis = m_PrincipalAxis;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_PowerMethodIterations = ::GetPrincipalAxis(
|
m_PowerMethodIterations = ::GetPrincipalAxis(
|
||||||
m_NumPoints,
|
m_NumPoints,
|
||||||
m_DataPoints,
|
m_DataPoints,
|
||||||
m_PrincipalAxis,
|
m_PrincipalAxis,
|
||||||
m_PrincipalEigenvalue,
|
m_PrincipalEigenvalue,
|
||||||
&m_SecondEigenvalue
|
&m_SecondEigenvalue
|
||||||
);
|
);
|
||||||
|
|
||||||
m_PrincipalAxisCached = true;
|
m_PrincipalAxisCached = true;
|
||||||
|
|
||||||
GetPrincipalAxis(axis);
|
GetPrincipalAxis(axis);
|
||||||
}
|
}
|
||||||
|
|
||||||
double RGBACluster::GetPrincipalEigenvalue() {
|
double RGBACluster::GetPrincipalEigenvalue() {
|
||||||
|
@ -408,74 +408,74 @@ uint32 RGBACluster::GetPowerMethodIterations() {
|
||||||
|
|
||||||
double RGBACluster::QuantizedError(const RGBAVector &p1, const RGBAVector &p2, uint8 nBuckets, uint32 bitMask, const RGBAVector &errorMetricVec, const int pbits[2], int *indices) const {
|
double RGBACluster::QuantizedError(const RGBAVector &p1, const RGBAVector &p2, uint8 nBuckets, uint32 bitMask, const RGBAVector &errorMetricVec, const int pbits[2], int *indices) const {
|
||||||
|
|
||||||
// nBuckets should be a power of two.
|
// nBuckets should be a power of two.
|
||||||
assert(nBuckets == 3 || !(nBuckets & (nBuckets - 1)));
|
assert(nBuckets == 3 || !(nBuckets & (nBuckets - 1)));
|
||||||
|
|
||||||
const uint8 indexPrec = (nBuckets == 3)? 3 : 8-CountBitsInMask(~(nBuckets - 1));
|
const uint8 indexPrec = (nBuckets == 3)? 3 : 8-CountBitsInMask(~(nBuckets - 1));
|
||||||
|
|
||||||
typedef uint32 tInterpPair[2];
|
typedef uint32 tInterpPair[2];
|
||||||
typedef tInterpPair tInterpLevel[16];
|
typedef tInterpPair tInterpLevel[16];
|
||||||
const tInterpLevel *interpVals = (nBuckets == 3)? kBC7InterpolationValues : kBC7InterpolationValues + (indexPrec - 1);
|
const tInterpLevel *interpVals = (nBuckets == 3)? kBC7InterpolationValues : kBC7InterpolationValues + (indexPrec - 1);
|
||||||
|
|
||||||
assert(indexPrec >= 2 && indexPrec <= 4);
|
assert(indexPrec >= 2 && indexPrec <= 4);
|
||||||
|
|
||||||
uint32 qp1, qp2;
|
uint32 qp1, qp2;
|
||||||
if(pbits) {
|
if(pbits) {
|
||||||
qp1 = p1.ToPixel(bitMask, pbits[0]);
|
qp1 = p1.ToPixel(bitMask, pbits[0]);
|
||||||
qp2 = p2.ToPixel(bitMask, pbits[1]);
|
qp2 = p2.ToPixel(bitMask, pbits[1]);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
qp1 = p1.ToPixel(bitMask);
|
qp1 = p1.ToPixel(bitMask);
|
||||||
qp2 = p2.ToPixel(bitMask);
|
qp2 = p2.ToPixel(bitMask);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8 *pqp1 = (uint8 *)&qp1;
|
uint8 *pqp1 = (uint8 *)&qp1;
|
||||||
uint8 *pqp2 = (uint8 *)&qp2;
|
uint8 *pqp2 = (uint8 *)&qp2;
|
||||||
|
|
||||||
const RGBAVector metric = errorMetricVec;
|
const RGBAVector metric = errorMetricVec;
|
||||||
|
|
||||||
float totalError = 0.0;
|
float totalError = 0.0;
|
||||||
for(uint32 i = 0; i < m_NumPoints; i++) {
|
for(uint32 i = 0; i < m_NumPoints; i++) {
|
||||||
|
|
||||||
const uint32 pixel = m_DataPoints[i].ToPixel();
|
const uint32 pixel = m_DataPoints[i].ToPixel();
|
||||||
const uint8 *pb = (const uint8 *)(&pixel);
|
const uint8 *pb = (const uint8 *)(&pixel);
|
||||||
|
|
||||||
float minError = FLT_MAX;
|
float minError = FLT_MAX;
|
||||||
int bestBucket = -1;
|
int bestBucket = -1;
|
||||||
for(int j = 0; j < nBuckets; j++) {
|
for(int j = 0; j < nBuckets; j++) {
|
||||||
|
|
||||||
uint32 interp0 = (*interpVals)[j][0];
|
uint32 interp0 = (*interpVals)[j][0];
|
||||||
uint32 interp1 = (*interpVals)[j][1];
|
uint32 interp1 = (*interpVals)[j][1];
|
||||||
|
|
||||||
RGBAVector errorVec (0.0f);
|
RGBAVector errorVec (0.0f);
|
||||||
for(uint32 k = 0; k < kNumColorChannels; k++) {
|
for(uint32 k = 0; k < kNumColorChannels; k++) {
|
||||||
const uint8 ip = (((uint32(pqp1[k]) * interp0) + (uint32(pqp2[k]) * interp1) + 32) >> 6) & 0xFF;
|
const uint8 ip = (((uint32(pqp1[k]) * interp0) + (uint32(pqp2[k]) * interp1) + 32) >> 6) & 0xFF;
|
||||||
const uint8 dist = sad(pb[k], ip);
|
const uint8 dist = sad(pb[k], ip);
|
||||||
errorVec.c[k] = kFloatConversion[dist] * metric.c[k];
|
errorVec.c[k] = kFloatConversion[dist] * metric.c[k];
|
||||||
}
|
}
|
||||||
|
|
||||||
float error = errorVec * errorVec;
|
float error = errorVec * errorVec;
|
||||||
if(error < minError) {
|
if(error < minError) {
|
||||||
minError = error;
|
minError = error;
|
||||||
bestBucket = j;
|
bestBucket = j;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Conceptually, once the error starts growing, it doesn't stop growing (we're moving
|
// Conceptually, once the error starts growing, it doesn't stop growing (we're moving
|
||||||
// farther away from the reference point along the line). Hence we can early out here.
|
// farther away from the reference point along the line). Hence we can early out here.
|
||||||
// However, quanitzation artifacts mean that this is not ALWAYS the case, so we do suffer
|
// However, quanitzation artifacts mean that this is not ALWAYS the case, so we do suffer
|
||||||
// about 0.01 RMS error.
|
// about 0.01 RMS error.
|
||||||
else if(error > minError) {
|
else if(error > minError) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
totalError += minError;
|
totalError += minError;
|
||||||
|
|
||||||
assert(bestBucket >= 0);
|
assert(bestBucket >= 0);
|
||||||
if(indices) indices[i] = bestBucket;
|
if(indices) indices[i] = bestBucket;
|
||||||
}
|
}
|
||||||
|
|
||||||
return totalError;
|
return totalError;
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -485,175 +485,174 @@ double RGBACluster::QuantizedError(const RGBAVector &p1, const RGBAVector &p2, u
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
void ClampEndpoints(RGBAVector &p1, RGBAVector &p2) {
|
void ClampEndpoints(RGBAVector &p1, RGBAVector &p2) {
|
||||||
clamp(p1.r, 0.0f, 255.0f);
|
clamp(p1.r, 0.0f, 255.0f);
|
||||||
clamp(p1.g, 0.0f, 255.0f);
|
clamp(p1.g, 0.0f, 255.0f);
|
||||||
clamp(p1.b, 0.0f, 255.0f);
|
clamp(p1.b, 0.0f, 255.0f);
|
||||||
clamp(p1.a, 0.0f, 255.0f);
|
clamp(p1.a, 0.0f, 255.0f);
|
||||||
|
|
||||||
clamp(p2.r, 0.0f, 255.0f);
|
clamp(p2.r, 0.0f, 255.0f);
|
||||||
clamp(p2.g, 0.0f, 255.0f);
|
clamp(p2.g, 0.0f, 255.0f);
|
||||||
clamp(p2.b, 0.0f, 255.0f);
|
clamp(p2.b, 0.0f, 255.0f);
|
||||||
clamp(p2.a, 0.0f, 255.0f);
|
clamp(p2.a, 0.0f, 255.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32 PowerIteration(const RGBAMatrix &mat, RGBADir &eigVec, double &eigVal) {
|
static uint32 PowerIteration(const RGBAMatrix &mat, RGBADir &eigVec, double &eigVal) {
|
||||||
|
|
||||||
int numIterations = 0;
|
int numIterations = 0;
|
||||||
const int kMaxNumIterations = 200;
|
const int kMaxNumIterations = 200;
|
||||||
|
|
||||||
for(int nTries = 0; nTries < 3; nTries++) {
|
for(int nTries = 0; nTries < 3; nTries++) {
|
||||||
// !SPEED! Find eigenvectors by using the power method. This is good because the
|
// !SPEED! Find eigenvectors by using the power method. This is good because the
|
||||||
// matrix is only 4x4, which allows us to use SIMD...
|
// matrix is only 4x4, which allows us to use SIMD...
|
||||||
RGBAVector b = RGBAVector(float(rand()) + 1.0f);
|
RGBAVector b = RGBAVector(float(rand()) + 1.0f);
|
||||||
b /= b.Length();
|
b /= b.Length();
|
||||||
|
|
||||||
bool fixed = false;
|
bool fixed = false;
|
||||||
numIterations = 0;
|
numIterations = 0;
|
||||||
while(!fixed && ++numIterations < kMaxNumIterations) {
|
while(!fixed && ++numIterations < kMaxNumIterations) {
|
||||||
|
|
||||||
RGBAVector newB = mat * b;
|
RGBAVector newB = mat * b;
|
||||||
|
|
||||||
// !HACK! If the principal eigenvector of the covariance matrix
|
// !HACK! If the principal eigenvector of the covariance matrix
|
||||||
// converges to zero, that means that the points lie equally
|
// converges to zero, that means that the points lie equally
|
||||||
// spaced on a sphere in this space. In this (extremely rare)
|
// spaced on a sphere in this space. In this (extremely rare)
|
||||||
// situation, just choose a point and use it as the principal
|
// situation, just choose a point and use it as the principal
|
||||||
// direction.
|
// direction.
|
||||||
const float newBlen = newB.Length();
|
const float newBlen = newB.Length();
|
||||||
if(newBlen < 1e-10) {
|
if(newBlen < 1e-10) {
|
||||||
eigVec = b;
|
eigVec = b;
|
||||||
eigVal = 0.0;
|
eigVal = 0.0;
|
||||||
return numIterations;
|
return numIterations;
|
||||||
}
|
}
|
||||||
|
|
||||||
eigVal = newB.Length();
|
eigVal = newB.Length();
|
||||||
newB /= float(eigVal);
|
newB /= float(eigVal);
|
||||||
|
|
||||||
if(fabs(1.0f - (b * newB)) < 1e-5)
|
if(fabs(1.0f - (b * newB)) < 1e-5)
|
||||||
fixed = true;
|
fixed = true;
|
||||||
|
|
||||||
b = newB;
|
b = newB;
|
||||||
}
|
|
||||||
|
|
||||||
eigVec = b;
|
|
||||||
if(numIterations < kMaxNumIterations) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if(numIterations == kMaxNumIterations) {
|
eigVec = b;
|
||||||
eigVal = 0.0;
|
if(numIterations < kMaxNumIterations) {
|
||||||
}
|
break;
|
||||||
return numIterations;
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(numIterations == kMaxNumIterations) {
|
||||||
|
eigVal = 0.0;
|
||||||
|
}
|
||||||
|
return numIterations;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32 GetPrincipalAxis(uint32 nPts, const RGBAVector *pts, RGBADir &axis, double &eigOne, double *eigTwo) {
|
uint32 GetPrincipalAxis(uint32 nPts, const RGBAVector *pts, RGBADir &axis, double &eigOne, double *eigTwo) {
|
||||||
|
|
||||||
assert(nPts <= kMaxNumDataPoints);
|
assert(nPts <= kMaxNumDataPoints);
|
||||||
|
|
||||||
RGBAVector avg (0.0f);
|
RGBAVector avg (0.0f);
|
||||||
for(uint32 i = 0; i < nPts; i++) {
|
for(uint32 i = 0; i < nPts; i++) {
|
||||||
avg += pts[i];
|
avg += pts[i];
|
||||||
}
|
}
|
||||||
avg /= float(nPts);
|
avg /= float(nPts);
|
||||||
|
|
||||||
// We use these vectors for calculating the covariance matrix...
|
// We use these vectors for calculating the covariance matrix...
|
||||||
RGBAVector toPts[kMaxNumDataPoints];
|
RGBAVector toPts[kMaxNumDataPoints];
|
||||||
RGBAVector toPtsMax(-FLT_MAX);
|
RGBAVector toPtsMax(-FLT_MAX);
|
||||||
for(uint32 i = 0; i < nPts; i++) {
|
for(uint32 i = 0; i < nPts; i++) {
|
||||||
toPts[i] = pts[i] - avg;
|
toPts[i] = pts[i] - avg;
|
||||||
|
|
||||||
for(uint32 j = 0; j < kNumColorChannels; j++) {
|
for(uint32 j = 0; j < kNumColorChannels; j++) {
|
||||||
toPtsMax.c[j] = max(toPtsMax.c[j], toPts[i].c[j]);
|
toPtsMax.c[j] = max(toPtsMax.c[j], toPts[i].c[j]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generate a list of unique points...
|
// Generate a list of unique points...
|
||||||
RGBAVector upts[kMaxNumDataPoints];
|
RGBAVector upts[kMaxNumDataPoints];
|
||||||
uint32 uptsIdx = 0;
|
uint32 uptsIdx = 0;
|
||||||
for(uint32 i = 0; i < nPts; i++) {
|
for(uint32 i = 0; i < nPts; i++) {
|
||||||
|
|
||||||
bool hasPt = false;
|
bool hasPt = false;
|
||||||
for(uint32 j = 0; j < uptsIdx; j++) {
|
for(uint32 j = 0; j < uptsIdx; j++) {
|
||||||
if(upts[j] == pts[i])
|
if(upts[j] == pts[i])
|
||||||
hasPt = true;
|
hasPt = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(!hasPt) {
|
if(!hasPt) {
|
||||||
upts[uptsIdx++] = pts[i];
|
upts[uptsIdx++] = pts[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(uptsIdx > 0);
|
assert(uptsIdx > 0);
|
||||||
|
|
||||||
if(uptsIdx == 1) {
|
if(uptsIdx == 1) {
|
||||||
axis.r = axis.g = axis.b = axis.a = 0.0f;
|
axis.r = axis.g = axis.b = axis.a = 0.0f;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
|
||||||
// Collinear?
|
|
||||||
else {
|
|
||||||
|
|
||||||
RGBADir dir (upts[1] - upts[0]);
|
// Collinear?
|
||||||
bool collinear = true;
|
} else {
|
||||||
for(uint32 i = 2; i < nPts; i++) {
|
RGBADir dir (upts[1] - upts[0]);
|
||||||
RGBAVector v = (upts[i] - upts[0]);
|
bool collinear = true;
|
||||||
if(fabs(fabs(v*dir) - v.Length()) > 1e-7) {
|
for(uint32 i = 2; i < nPts; i++) {
|
||||||
collinear = false;
|
RGBAVector v = (upts[i] - upts[0]);
|
||||||
break;
|
if(fabs(fabs(v*dir) - v.Length()) > 1e-7) {
|
||||||
}
|
collinear = false;
|
||||||
}
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if(collinear) {
|
if(collinear) {
|
||||||
axis = dir;
|
axis = dir;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAMatrix covMatrix;
|
RGBAMatrix covMatrix;
|
||||||
|
|
||||||
// Compute covariance.
|
// Compute covariance.
|
||||||
for(uint32 i = 0; i < kNumColorChannels; i++) {
|
for(uint32 i = 0; i < kNumColorChannels; i++) {
|
||||||
for(uint32 j = 0; j <= i; j++) {
|
for(uint32 j = 0; j <= i; j++) {
|
||||||
|
|
||||||
float sum = 0.0;
|
float sum = 0.0;
|
||||||
for(uint32 k = 0; k < nPts; k++) {
|
for(uint32 k = 0; k < nPts; k++) {
|
||||||
sum += toPts[k].c[i] * toPts[k].c[j];
|
sum += toPts[k].c[i] * toPts[k].c[j];
|
||||||
}
|
}
|
||||||
|
|
||||||
covMatrix(i, j) = sum / kFloatConversion[kNumColorChannels - 1];
|
covMatrix(i, j) = sum / kFloatConversion[kNumColorChannels - 1];
|
||||||
covMatrix(j, i) = covMatrix(i, j);
|
covMatrix(j, i) = covMatrix(i, j);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32 iters = PowerIteration(covMatrix, axis, eigOne);
|
uint32 iters = PowerIteration(covMatrix, axis, eigOne);
|
||||||
|
|
||||||
if(NULL != eigTwo) {
|
if(NULL != eigTwo) {
|
||||||
if(eigOne != 0.0) {
|
if(eigOne != 0.0) {
|
||||||
RGBAMatrix reduced = covMatrix - eigOne * RGBAMatrix(
|
RGBAMatrix reduced = covMatrix - eigOne * RGBAMatrix(
|
||||||
axis.c[0] * axis.c[0], axis.c[0] * axis.c[1], axis.c[0] * axis.c[2], axis.c[0] * axis.c[3],
|
axis.c[0] * axis.c[0], axis.c[0] * axis.c[1], axis.c[0] * axis.c[2], axis.c[0] * axis.c[3],
|
||||||
axis.c[1] * axis.c[0], axis.c[1] * axis.c[1], axis.c[1] * axis.c[2], axis.c[1] * axis.c[3],
|
axis.c[1] * axis.c[0], axis.c[1] * axis.c[1], axis.c[1] * axis.c[2], axis.c[1] * axis.c[3],
|
||||||
axis.c[2] * axis.c[0], axis.c[2] * axis.c[1], axis.c[2] * axis.c[2], axis.c[2] * axis.c[3],
|
axis.c[2] * axis.c[0], axis.c[2] * axis.c[1], axis.c[2] * axis.c[2], axis.c[2] * axis.c[3],
|
||||||
axis.c[3] * axis.c[0], axis.c[3] * axis.c[1], axis.c[3] * axis.c[2], axis.c[3] * axis.c[3]
|
axis.c[3] * axis.c[0], axis.c[3] * axis.c[1], axis.c[3] * axis.c[2], axis.c[3] * axis.c[3]
|
||||||
);
|
);
|
||||||
|
|
||||||
bool allZero = true;
|
bool allZero = true;
|
||||||
for(uint32 i = 0; i < 16; i++) {
|
for(uint32 i = 0; i < 16; i++) {
|
||||||
if(fabs(reduced[i]) > 0.0005) {
|
if(fabs(reduced[i]) > 0.0005) {
|
||||||
allZero = false;
|
allZero = false;
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if(allZero) {
|
|
||||||
*eigTwo = 0.0;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
RGBADir dummyDir;
|
|
||||||
iters += PowerIteration(reduced, dummyDir, *eigTwo);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
*eigTwo = 0.0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return iters;
|
if(allZero) {
|
||||||
|
*eigTwo = 0.0;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
RGBADir dummyDir;
|
||||||
|
iters += PowerIteration(reduced, dummyDir, *eigTwo);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
*eigTwo = 0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return iters;
|
||||||
}
|
}
|
||||||
|
|
|
@ -78,260 +78,260 @@ static const uint32 kMaxNumDataPoints = 16;
|
||||||
class RGBAVector {
|
class RGBAVector {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
union {
|
union {
|
||||||
struct { float r, g, b, a; };
|
struct { float r, g, b, a; };
|
||||||
struct { float x, y, z, w; };
|
struct { float x, y, z, w; };
|
||||||
float c[4];
|
float c[4];
|
||||||
};
|
};
|
||||||
|
|
||||||
uint32 GetIdx() const { return idx; }
|
uint32 GetIdx() const { return idx; }
|
||||||
|
|
||||||
RGBAVector() : r(-1.0), g(-1.0), b(-1.0), a(-1.0) { }
|
RGBAVector() : r(-1.0), g(-1.0), b(-1.0), a(-1.0) { }
|
||||||
RGBAVector(uint32 _idx, uint32 pixel) :
|
RGBAVector(uint32 _idx, uint32 pixel) :
|
||||||
r(float(pixel & 0xFF)),
|
r(float(pixel & 0xFF)),
|
||||||
g(float((pixel >> 8) & 0xFF)),
|
g(float((pixel >> 8) & 0xFF)),
|
||||||
b(float((pixel >> 16) & 0xFF)),
|
b(float((pixel >> 16) & 0xFF)),
|
||||||
a(float((pixel >> 24) & 0xFF)),
|
a(float((pixel >> 24) & 0xFF)),
|
||||||
idx(_idx)
|
idx(_idx)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
RGBAVector(float _r, float _g, float _b, float _a) :
|
RGBAVector(float _r, float _g, float _b, float _a) :
|
||||||
r(_r), g(_g), b(_b), a(_a), idx(0) { }
|
r(_r), g(_g), b(_b), a(_a), idx(0) { }
|
||||||
|
|
||||||
explicit RGBAVector(float cc) : r(cc), g(cc), b(cc), a(cc), idx(0) { }
|
explicit RGBAVector(float cc) : r(cc), g(cc), b(cc), a(cc), idx(0) { }
|
||||||
|
|
||||||
RGBAVector &operator =(const RGBAVector &other) {
|
RGBAVector &operator =(const RGBAVector &other) {
|
||||||
this->idx = other.idx;
|
this->idx = other.idx;
|
||||||
memcpy(c, other.c, sizeof(c));
|
memcpy(c, other.c, sizeof(c));
|
||||||
return (*this);
|
return (*this);
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAVector operator +(const RGBAVector &p) const {
|
RGBAVector operator +(const RGBAVector &p) const {
|
||||||
return RGBAVector(r + p.r, g + p.g, b + p.b, a + p.a);
|
return RGBAVector(r + p.r, g + p.g, b + p.b, a + p.a);
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAVector &operator +=(const RGBAVector &p) {
|
RGBAVector &operator +=(const RGBAVector &p) {
|
||||||
r += p.r; g += p.g; b += p.b; a += p.a;
|
r += p.r; g += p.g; b += p.b; a += p.a;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAVector operator -(const RGBAVector &p) const {
|
RGBAVector operator -(const RGBAVector &p) const {
|
||||||
return RGBAVector(r - p.r, g - p.g, b - p.b, a - p.a);
|
return RGBAVector(r - p.r, g - p.g, b - p.b, a - p.a);
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAVector &operator -=(const RGBAVector &p) {
|
RGBAVector &operator -=(const RGBAVector &p) {
|
||||||
r -= p.r; g -= p.g; b -= p.b; a -= p.a;
|
r -= p.r; g -= p.g; b -= p.b; a -= p.a;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAVector operator /(const float s) const {
|
RGBAVector operator /(const float s) const {
|
||||||
return RGBAVector(r / s, g / s, b / s, a / s);
|
return RGBAVector(r / s, g / s, b / s, a / s);
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAVector &operator /=(const float s) {
|
RGBAVector &operator /=(const float s) {
|
||||||
r /= s; g /= s; b /= s; a /= s;
|
r /= s; g /= s; b /= s; a /= s;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
float operator *(const RGBAVector &p) const {
|
float operator *(const RGBAVector &p) const {
|
||||||
return r * p.r + g * p.g + b * p.b + a * p.a;
|
return r * p.r + g * p.g + b * p.b + a * p.a;
|
||||||
}
|
}
|
||||||
|
|
||||||
float Length() const {
|
float Length() const {
|
||||||
return sqrt((*this) * (*this));
|
return sqrt((*this) * (*this));
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAVector &operator *=(const RGBAVector &v) {
|
RGBAVector &operator *=(const RGBAVector &v) {
|
||||||
r *= v.r; g *= v.g; b *= v.b; a *= v.a;
|
r *= v.r; g *= v.g; b *= v.b; a *= v.a;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAVector operator *(const float s) const {
|
RGBAVector operator *(const float s) const {
|
||||||
return RGBAVector(r * s, g * s, b * s, a * s);
|
return RGBAVector(r * s, g * s, b * s, a * s);
|
||||||
}
|
}
|
||||||
|
|
||||||
friend RGBAVector operator *(const float s, const RGBAVector &p) {
|
friend RGBAVector operator *(const float s, const RGBAVector &p) {
|
||||||
return RGBAVector(p.r * s, p.g * s, p.b * s, p.a * s);
|
return RGBAVector(p.r * s, p.g * s, p.b * s, p.a * s);
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAVector &operator *=(const float s) {
|
RGBAVector &operator *=(const float s) {
|
||||||
r *= s; g *= s; b *= s; a *= s;
|
r *= s; g *= s; b *= s; a *= s;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
float &operator [](const int i) {
|
float &operator [](const int i) {
|
||||||
return c[i];
|
return c[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
friend bool operator ==(const RGBAVector &rhs, const RGBAVector &lhs) {
|
friend bool operator ==(const RGBAVector &rhs, const RGBAVector &lhs) {
|
||||||
const RGBAVector d = rhs - lhs;
|
const RGBAVector d = rhs - lhs;
|
||||||
return fabs(d.r) < 1e-7 && fabs(d.g) < 1e-7 && fabs(d.b) < 1e-7 && fabs(d.a) < 1e-7;
|
return fabs(d.r) < 1e-7 && fabs(d.g) < 1e-7 && fabs(d.b) < 1e-7 && fabs(d.a) < 1e-7;
|
||||||
}
|
}
|
||||||
|
|
||||||
friend bool operator !=(const RGBAVector &rhs, const RGBAVector &lhs) {
|
friend bool operator !=(const RGBAVector &rhs, const RGBAVector &lhs) {
|
||||||
return !(rhs == lhs);
|
return !(rhs == lhs);
|
||||||
}
|
}
|
||||||
|
|
||||||
operator float *() {
|
operator float *() {
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAVector Cross(const RGBAVector &rhs) {
|
RGBAVector Cross(const RGBAVector &rhs) {
|
||||||
return RGBAVector(
|
return RGBAVector(
|
||||||
rhs.y * z - y * rhs.z,
|
rhs.y * z - y * rhs.z,
|
||||||
rhs.z * x - z * rhs.x,
|
rhs.z * x - z * rhs.x,
|
||||||
rhs.x * y - x * rhs.y,
|
rhs.x * y - x * rhs.y,
|
||||||
1.0f
|
1.0f
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Quantize this point.
|
// Quantize this point.
|
||||||
uint32 ToPixel(const uint32 channelMask = 0xFFFFFFFF, const int pBit = -1) const;
|
uint32 ToPixel(const uint32 channelMask = 0xFFFFFFFF, const int pBit = -1) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
uint32 idx;
|
uint32 idx;
|
||||||
};
|
};
|
||||||
|
|
||||||
class RGBAMatrix {
|
class RGBAMatrix {
|
||||||
private:
|
private:
|
||||||
union {
|
union {
|
||||||
float m[kNumColorChannels*kNumColorChannels];
|
float m[kNumColorChannels*kNumColorChannels];
|
||||||
struct {
|
struct {
|
||||||
float m1, m2, m3, m4;
|
float m1, m2, m3, m4;
|
||||||
float m5, m6, m7, m8;
|
float m5, m6, m7, m8;
|
||||||
float m9, m10, m11, m12;
|
float m9, m10, m11, m12;
|
||||||
float m13, m14, m15, m16;
|
float m13, m14, m15, m16;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
RGBAMatrix(const float *arr) {
|
RGBAMatrix(const float *arr) {
|
||||||
memcpy(m, arr, sizeof(m));
|
memcpy(m, arr, sizeof(m));
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
RGBAMatrix(
|
RGBAMatrix(
|
||||||
float _m1, float _m2, float _m3, float _m4,
|
float _m1, float _m2, float _m3, float _m4,
|
||||||
float _m5, float _m6, float _m7, float _m8,
|
float _m5, float _m6, float _m7, float _m8,
|
||||||
float _m9, float _m10, float _m11, float _m12,
|
float _m9, float _m10, float _m11, float _m12,
|
||||||
float _m13, float _m14, float _m15, float _m16
|
float _m13, float _m14, float _m15, float _m16
|
||||||
) :
|
) :
|
||||||
m1(_m1), m2(_m2), m3(_m3), m4(_m4),
|
m1(_m1), m2(_m2), m3(_m3), m4(_m4),
|
||||||
m5(_m5), m6(_m6), m7(_m7), m8(_m8),
|
m5(_m5), m6(_m6), m7(_m7), m8(_m8),
|
||||||
m9(_m9), m10(_m10), m11(_m11), m12(_m12),
|
m9(_m9), m10(_m10), m11(_m11), m12(_m12),
|
||||||
m13(_m13), m14(_m14), m15(_m15), m16(_m16)
|
m13(_m13), m14(_m14), m15(_m15), m16(_m16)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
RGBAMatrix() :
|
RGBAMatrix() :
|
||||||
m1(1.0f), m2(0.0f), m3(0.0f), m4(0.0f),
|
m1(1.0f), m2(0.0f), m3(0.0f), m4(0.0f),
|
||||||
m5(0.0f), m6(1.0f), m7(0.0f), m8(0.0f),
|
m5(0.0f), m6(1.0f), m7(0.0f), m8(0.0f),
|
||||||
m9(0.0f), m10(0.0f), m11(1.0f), m12(0.0f),
|
m9(0.0f), m10(0.0f), m11(1.0f), m12(0.0f),
|
||||||
m13(0.0f), m14(0.0f), m15(0.0f), m16(1.0f)
|
m13(0.0f), m14(0.0f), m15(0.0f), m16(1.0f)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
RGBAMatrix &operator =(const RGBAMatrix &other) {
|
RGBAMatrix &operator =(const RGBAMatrix &other) {
|
||||||
memcpy(m, other.m, sizeof(m));
|
memcpy(m, other.m, sizeof(m));
|
||||||
return (*this);
|
return (*this);
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAMatrix operator +(const RGBAMatrix &p) const {
|
RGBAMatrix operator +(const RGBAMatrix &p) const {
|
||||||
float newm[kNumColorChannels*kNumColorChannels];
|
float newm[kNumColorChannels*kNumColorChannels];
|
||||||
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] + p.m[i];
|
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] + p.m[i];
|
||||||
return RGBAMatrix(newm);
|
return RGBAMatrix(newm);
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAMatrix &operator +=(const RGBAMatrix &p) {
|
RGBAMatrix &operator +=(const RGBAMatrix &p) {
|
||||||
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] += p.m[i];
|
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] += p.m[i];
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAMatrix operator -(const RGBAMatrix &p) const {
|
RGBAMatrix operator -(const RGBAMatrix &p) const {
|
||||||
float newm[kNumColorChannels*kNumColorChannels];
|
float newm[kNumColorChannels*kNumColorChannels];
|
||||||
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] - p.m[i];
|
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] - p.m[i];
|
||||||
return RGBAMatrix(newm);
|
return RGBAMatrix(newm);
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAMatrix &operator -=(const RGBAMatrix &p) {
|
RGBAMatrix &operator -=(const RGBAMatrix &p) {
|
||||||
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] -= p.m[i];
|
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] -= p.m[i];
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAMatrix operator /(const float s) const {
|
RGBAMatrix operator /(const float s) const {
|
||||||
float newm[kNumColorChannels*kNumColorChannels];
|
float newm[kNumColorChannels*kNumColorChannels];
|
||||||
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] / s;
|
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] / s;
|
||||||
return RGBAMatrix(newm);
|
return RGBAMatrix(newm);
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAMatrix &operator /=(const float s) {
|
RGBAMatrix &operator /=(const float s) {
|
||||||
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] /= s;
|
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] /= s;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAMatrix operator *(const float s) const {
|
RGBAMatrix operator *(const float s) const {
|
||||||
float newm[kNumColorChannels*kNumColorChannels];
|
float newm[kNumColorChannels*kNumColorChannels];
|
||||||
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] * s;
|
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = m[i] * s;
|
||||||
return RGBAMatrix(newm);
|
return RGBAMatrix(newm);
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAMatrix operator *(const double s) const {
|
RGBAMatrix operator *(const double s) const {
|
||||||
float newm[kNumColorChannels*kNumColorChannels];
|
float newm[kNumColorChannels*kNumColorChannels];
|
||||||
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = float(double(m[i]) * s);
|
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = float(double(m[i]) * s);
|
||||||
return RGBAMatrix(newm);
|
return RGBAMatrix(newm);
|
||||||
}
|
}
|
||||||
|
|
||||||
friend RGBAMatrix operator *(const float s, const RGBAMatrix &p) {
|
friend RGBAMatrix operator *(const float s, const RGBAMatrix &p) {
|
||||||
float newm[kNumColorChannels*kNumColorChannels];
|
float newm[kNumColorChannels*kNumColorChannels];
|
||||||
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = p.m[i] * s;
|
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = p.m[i] * s;
|
||||||
return RGBAMatrix(newm);
|
return RGBAMatrix(newm);
|
||||||
}
|
}
|
||||||
|
|
||||||
friend RGBAMatrix operator *(const double s, const RGBAMatrix &p) {
|
friend RGBAMatrix operator *(const double s, const RGBAMatrix &p) {
|
||||||
float newm[kNumColorChannels*kNumColorChannels];
|
float newm[kNumColorChannels*kNumColorChannels];
|
||||||
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = float(double(p.m[i]) * s);
|
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) newm[i] = float(double(p.m[i]) * s);
|
||||||
return RGBAMatrix(newm);
|
return RGBAMatrix(newm);
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAMatrix &operator *=(const float s) {
|
RGBAMatrix &operator *=(const float s) {
|
||||||
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] *= s;
|
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++) m[i] *= s;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
float &operator ()(const int i, const int j) {
|
float &operator ()(const int i, const int j) {
|
||||||
return (*this)[i*4 + j];
|
return (*this)[i*4 + j];
|
||||||
}
|
}
|
||||||
|
|
||||||
float &operator [](const int i) {
|
float &operator [](const int i) {
|
||||||
return m[i];
|
return m[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
friend bool operator ==(const RGBAMatrix &rhs, const RGBAMatrix &lhs) {
|
friend bool operator ==(const RGBAMatrix &rhs, const RGBAMatrix &lhs) {
|
||||||
const RGBAMatrix d = rhs - lhs;
|
const RGBAMatrix d = rhs - lhs;
|
||||||
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++)
|
for(uint32 i = 0; i < kNumColorChannels*kNumColorChannels; i++)
|
||||||
if(d.m[i] > 1e-10)
|
if(d.m[i] > 1e-10)
|
||||||
return false;
|
return false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
operator float *() {
|
operator float *() {
|
||||||
return m;
|
return m;
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAVector operator *(const RGBAVector &p) const;
|
RGBAVector operator *(const RGBAVector &p) const;
|
||||||
RGBAMatrix operator *(const RGBAMatrix &mat) const;
|
RGBAMatrix operator *(const RGBAMatrix &mat) const;
|
||||||
RGBAMatrix &operator *=(const RGBAMatrix &mat);
|
RGBAMatrix &operator *=(const RGBAMatrix &mat);
|
||||||
static RGBAMatrix RotateX(float rad);
|
static RGBAMatrix RotateX(float rad);
|
||||||
static RGBAMatrix RotateY(float rad);
|
static RGBAMatrix RotateY(float rad);
|
||||||
static RGBAMatrix RotateZ(float rad);
|
static RGBAMatrix RotateZ(float rad);
|
||||||
static RGBAMatrix Translate(const RGBAVector &t);
|
static RGBAMatrix Translate(const RGBAVector &t);
|
||||||
bool Identity();
|
bool Identity();
|
||||||
};
|
};
|
||||||
|
|
||||||
class RGBADir : public RGBAVector {
|
class RGBADir : public RGBAVector {
|
||||||
public:
|
public:
|
||||||
RGBADir() : RGBAVector() { }
|
RGBADir() : RGBAVector() { }
|
||||||
RGBADir(const RGBAVector &p) : RGBAVector(p) {
|
RGBADir(const RGBAVector &p) : RGBAVector(p) {
|
||||||
*this /= Length();
|
*this /= Length();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Makes sure that the values of the endpoints lie between 0 and 1.
|
// Makes sure that the values of the endpoints lie between 0 and 1.
|
||||||
|
@ -340,83 +340,83 @@ extern void ClampEndpoints(RGBAVector &p1, RGBAVector &p2);
|
||||||
class RGBACluster {
|
class RGBACluster {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
RGBACluster() :
|
RGBACluster() :
|
||||||
m_NumPoints(0), m_Total(0),
|
m_NumPoints(0), m_Total(0),
|
||||||
m_PointBitString(0),
|
m_PointBitString(0),
|
||||||
m_Min(FLT_MAX),
|
m_Min(FLT_MAX),
|
||||||
m_Max(-FLT_MAX),
|
m_Max(-FLT_MAX),
|
||||||
m_PrincipalAxisCached(false)
|
m_PrincipalAxisCached(false)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
RGBACluster(const RGBACluster &c) :
|
RGBACluster(const RGBACluster &c) :
|
||||||
m_NumPoints(c.m_NumPoints),
|
m_NumPoints(c.m_NumPoints),
|
||||||
m_Total(c.m_Total),
|
m_Total(c.m_Total),
|
||||||
m_PointBitString(c.m_PointBitString),
|
m_PointBitString(c.m_PointBitString),
|
||||||
m_Min(c.m_Min),
|
m_Min(c.m_Min),
|
||||||
m_Max(c.m_Max),
|
m_Max(c.m_Max),
|
||||||
m_PrincipalAxisCached(c.m_PrincipalAxisCached),
|
m_PrincipalAxisCached(c.m_PrincipalAxisCached),
|
||||||
m_PrincipalEigenvalue(c.m_PrincipalEigenvalue),
|
m_PrincipalEigenvalue(c.m_PrincipalEigenvalue),
|
||||||
m_SecondEigenvalue(c.m_SecondEigenvalue),
|
m_SecondEigenvalue(c.m_SecondEigenvalue),
|
||||||
m_PowerMethodIterations(c.m_PowerMethodIterations),
|
m_PowerMethodIterations(c.m_PowerMethodIterations),
|
||||||
m_PrincipalAxis(c.m_PrincipalAxis)
|
m_PrincipalAxis(c.m_PrincipalAxis)
|
||||||
{
|
{
|
||||||
memcpy(this->m_DataPoints, c.m_DataPoints, m_NumPoints * sizeof(RGBAVector));
|
memcpy(this->m_DataPoints, c.m_DataPoints, m_NumPoints * sizeof(RGBAVector));
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBACluster(const RGBACluster &left, const RGBACluster &right);
|
RGBACluster(const RGBACluster &left, const RGBACluster &right);
|
||||||
RGBACluster(const RGBAVector &p) :
|
RGBACluster(const RGBAVector &p) :
|
||||||
m_NumPoints(1),
|
m_NumPoints(1),
|
||||||
m_Total(p),
|
m_Total(p),
|
||||||
m_PointBitString(0),
|
m_PointBitString(0),
|
||||||
m_Min(p), m_Max(p),
|
m_Min(p), m_Max(p),
|
||||||
m_PrincipalAxisCached(false)
|
m_PrincipalAxisCached(false)
|
||||||
{
|
{
|
||||||
m_DataPoints[0] = p;
|
m_DataPoints[0] = p;
|
||||||
m_PointBitString |= (1 << p.GetIdx());
|
m_PointBitString |= (1 << p.GetIdx());
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAVector GetTotal() const { return m_Total; }
|
RGBAVector GetTotal() const { return m_Total; }
|
||||||
const RGBAVector &GetPoint(int idx) const { return m_DataPoints[idx]; }
|
const RGBAVector &GetPoint(int idx) const { return m_DataPoints[idx]; }
|
||||||
uint32 GetNumPoints() const { return m_NumPoints; }
|
uint32 GetNumPoints() const { return m_NumPoints; }
|
||||||
RGBAVector GetAvg() const { return m_Total / float(m_NumPoints); }
|
RGBAVector GetAvg() const { return m_Total / float(m_NumPoints); }
|
||||||
const RGBAVector *GetPoints() const { return m_DataPoints; }
|
const RGBAVector *GetPoints() const { return m_DataPoints; }
|
||||||
|
|
||||||
void AddPoint(const RGBAVector &p);
|
void AddPoint(const RGBAVector &p);
|
||||||
|
|
||||||
void GetBoundingBox(RGBAVector &Min, RGBAVector &Max) const {
|
void GetBoundingBox(RGBAVector &Min, RGBAVector &Max) const {
|
||||||
Min = m_Min, Max = m_Max;
|
Min = m_Min, Max = m_Max;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns the error if we were to quantize the colors right now with the given number of buckets and bit mask.
|
// Returns the error if we were to quantize the colors right now with the given number of buckets and bit mask.
|
||||||
double QuantizedError(const RGBAVector &p1, const RGBAVector &p2, uint8 nBuckets, uint32 bitMask, const RGBAVector &errorMetricVec, const int pbits[2] = NULL, int *indices = NULL) const;
|
double QuantizedError(const RGBAVector &p1, const RGBAVector &p2, uint8 nBuckets, uint32 bitMask, const RGBAVector &errorMetricVec, const int pbits[2] = NULL, int *indices = NULL) const;
|
||||||
|
|
||||||
// Returns the principal axis for this point cluster.
|
// Returns the principal axis for this point cluster.
|
||||||
double GetPrincipalEigenvalue();
|
double GetPrincipalEigenvalue();
|
||||||
double GetSecondEigenvalue();
|
double GetSecondEigenvalue();
|
||||||
uint32 GetPowerMethodIterations();
|
uint32 GetPowerMethodIterations();
|
||||||
void GetPrincipalAxis(RGBADir &axis);
|
void GetPrincipalAxis(RGBADir &axis);
|
||||||
|
|
||||||
bool AllSamePoint() const { return m_Max == m_Min; }
|
bool AllSamePoint() const { return m_Max == m_Min; }
|
||||||
int GetPointBitString() const { return m_PointBitString; }
|
int GetPointBitString() const { return m_PointBitString; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
// The number of points in the cluster.
|
// The number of points in the cluster.
|
||||||
uint32 m_NumPoints;
|
uint32 m_NumPoints;
|
||||||
|
|
||||||
RGBAVector m_Total;
|
RGBAVector m_Total;
|
||||||
|
|
||||||
// The points in the cluster.
|
// The points in the cluster.
|
||||||
RGBAVector m_DataPoints[kMaxNumDataPoints];
|
RGBAVector m_DataPoints[kMaxNumDataPoints];
|
||||||
|
|
||||||
int m_PointBitString;
|
int m_PointBitString;
|
||||||
RGBAVector m_Min, m_Max;
|
RGBAVector m_Min, m_Max;
|
||||||
|
|
||||||
bool m_PrincipalAxisCached;
|
bool m_PrincipalAxisCached;
|
||||||
double m_PrincipalEigenvalue;
|
double m_PrincipalEigenvalue;
|
||||||
double m_SecondEigenvalue;
|
double m_SecondEigenvalue;
|
||||||
uint32 m_PowerMethodIterations;
|
uint32 m_PowerMethodIterations;
|
||||||
RGBADir m_PrincipalAxis;
|
RGBADir m_PrincipalAxis;
|
||||||
};
|
};
|
||||||
|
|
||||||
extern uint8 QuantizeChannel(const uint8 val, const uint8 mask, const int pBit = -1);
|
extern uint8 QuantizeChannel(const uint8 val, const uint8 mask, const int pBit = -1);
|
||||||
|
|
|
@ -92,37 +92,37 @@ static inline uint32 popcnt32(uint32 x) {
|
||||||
|
|
||||||
/* Original scalar implementation:
|
/* Original scalar implementation:
|
||||||
|
|
||||||
// If the mask is all the bits, then we can just return the value.
|
// If the mask is all the bits, then we can just return the value.
|
||||||
if(mask == 0xFF) {
|
if(mask == 0xFF) {
|
||||||
return val;
|
return val;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32 prec = CountBitsInMask(mask);
|
uint32 prec = CountBitsInMask(mask);
|
||||||
const uint32 step = 1 << (8 - prec);
|
const uint32 step = 1 << (8 - prec);
|
||||||
|
|
||||||
assert(step-1 == uint8(~mask));
|
assert(step-1 == uint8(~mask));
|
||||||
|
|
||||||
uint32 lval = val & mask;
|
uint32 lval = val & mask;
|
||||||
uint32 hval = lval + step;
|
uint32 hval = lval + step;
|
||||||
|
|
||||||
if(pBit >= 0) {
|
if(pBit >= 0) {
|
||||||
prec++;
|
prec++;
|
||||||
lval |= !!(pBit) << (8 - prec);
|
lval |= !!(pBit) << (8 - prec);
|
||||||
hval |= !!(pBit) << (8 - prec);
|
hval |= !!(pBit) << (8 - prec);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(lval > val) {
|
if(lval > val) {
|
||||||
lval -= step;
|
lval -= step;
|
||||||
hval -= step;
|
hval -= step;
|
||||||
}
|
}
|
||||||
|
|
||||||
lval |= lval >> prec;
|
lval |= lval >> prec;
|
||||||
hval |= hval >> prec;
|
hval |= hval >> prec;
|
||||||
|
|
||||||
if(sad(val, lval) < sad(val, hval))
|
if(sad(val, lval) < sad(val, hval))
|
||||||
return lval;
|
return lval;
|
||||||
else
|
else
|
||||||
return hval;
|
return hval;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// !TODO! AVX2 supports an instruction known as vsllv, which shifts a vector
|
// !TODO! AVX2 supports an instruction known as vsllv, which shifts a vector
|
||||||
|
@ -158,114 +158,114 @@ static const ALIGN_SSE uint32 kThirtyTwoVector[4] = { 32, 32, 32, 32 };
|
||||||
static const __m128i kByteValMask = _mm_set_epi32(0xFF, 0xFF, 0xFF, 0xFF);
|
static const __m128i kByteValMask = _mm_set_epi32(0xFF, 0xFF, 0xFF, 0xFF);
|
||||||
|
|
||||||
static inline __m128i sad(const __m128i &a, const __m128i &b) {
|
static inline __m128i sad(const __m128i &a, const __m128i &b) {
|
||||||
const __m128i maxab = _mm_max_epu8(a, b);
|
const __m128i maxab = _mm_max_epu8(a, b);
|
||||||
const __m128i minab = _mm_min_epu8(a, b);
|
const __m128i minab = _mm_min_epu8(a, b);
|
||||||
return _mm_and_si128( kByteValMask, _mm_subs_epu8( maxab, minab ) );
|
return _mm_and_si128( kByteValMask, _mm_subs_epu8( maxab, minab ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
__m128i RGBAVectorSIMD::ToPixel(const __m128i &qmask) const {
|
__m128i RGBAVectorSIMD::ToPixel(const __m128i &qmask) const {
|
||||||
|
|
||||||
// !SPEED! We should figure out a way to get rid of these scalar operations.
|
// !SPEED! We should figure out a way to get rid of these scalar operations.
|
||||||
#ifdef HAS_SSE_POPCNT
|
#ifdef HAS_SSE_POPCNT
|
||||||
const uint32 prec = _mm_popcnt_u32(((uint32 *)(&qmask))[0]);
|
const uint32 prec = _mm_popcnt_u32(((uint32 *)(&qmask))[0]);
|
||||||
#else
|
#else
|
||||||
const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]);
|
const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
assert(r >= 0.0f && r <= 255.0f);
|
assert(r >= 0.0f && r <= 255.0f);
|
||||||
assert(g >= 0.0f && g <= 255.0f);
|
assert(g >= 0.0f && g <= 255.0f);
|
||||||
assert(b >= 0.0f && b <= 255.0f);
|
assert(b >= 0.0f && b <= 255.0f);
|
||||||
assert(a >= 0.0f && a <= 255.0f);
|
assert(a >= 0.0f && a <= 255.0f);
|
||||||
assert(((uint32 *)(&qmask))[3] == 0xFF || ((uint32 *)(&qmask))[3] == ((uint32 *)(&qmask))[0]);
|
assert(((uint32 *)(&qmask))[3] == 0xFF || ((uint32 *)(&qmask))[3] == ((uint32 *)(&qmask))[0]);
|
||||||
assert(((uint32 *)(&qmask))[2] == ((uint32 *)(&qmask))[1] && ((uint32 *)(&qmask))[0] == ((uint32 *)(&qmask))[1]);
|
assert(((uint32 *)(&qmask))[2] == ((uint32 *)(&qmask))[1] && ((uint32 *)(&qmask))[0] == ((uint32 *)(&qmask))[1]);
|
||||||
|
|
||||||
const __m128i val = _mm_cvtps_epi32( _mm_add_ps(kHalfVector, vec) );
|
const __m128i val = _mm_cvtps_epi32( _mm_add_ps(kHalfVector, vec) );
|
||||||
|
|
||||||
const __m128i step = _mm_slli_epi32( kOneVector, 8 - prec );
|
const __m128i step = _mm_slli_epi32( kOneVector, 8 - prec );
|
||||||
const __m128i &mask = qmask;
|
const __m128i &mask = qmask;
|
||||||
|
|
||||||
__m128i lval = _mm_and_si128(val, mask);
|
__m128i lval = _mm_and_si128(val, mask);
|
||||||
__m128i hval = _mm_add_epi32(lval, step);
|
__m128i hval = _mm_add_epi32(lval, step);
|
||||||
|
|
||||||
const __m128i lvalShift = _mm_srli_epi32(lval, prec);
|
const __m128i lvalShift = _mm_srli_epi32(lval, prec);
|
||||||
const __m128i hvalShift = _mm_srli_epi32(hval, prec);
|
const __m128i hvalShift = _mm_srli_epi32(hval, prec);
|
||||||
|
|
||||||
lval = _mm_or_si128(lval, lvalShift);
|
lval = _mm_or_si128(lval, lvalShift);
|
||||||
hval = _mm_or_si128(hval, hvalShift);
|
hval = _mm_or_si128(hval, hvalShift);
|
||||||
|
|
||||||
const __m128i lvald = _mm_sub_epi32( val, lval );
|
const __m128i lvald = _mm_sub_epi32( val, lval );
|
||||||
const __m128i hvald = _mm_sub_epi32( hval, val );
|
const __m128i hvald = _mm_sub_epi32( hval, val );
|
||||||
|
|
||||||
const __m128i vd = _mm_cmplt_epi32(lvald, hvald);
|
const __m128i vd = _mm_cmplt_epi32(lvald, hvald);
|
||||||
__m128i ans = _mm_blendv_epi8(hval, lval, vd);
|
__m128i ans = _mm_blendv_epi8(hval, lval, vd);
|
||||||
|
|
||||||
const __m128i chanExact = _mm_cmpeq_epi32(mask, kByteValMask);
|
const __m128i chanExact = _mm_cmpeq_epi32(mask, kByteValMask);
|
||||||
ans = _mm_blendv_epi8( ans, val, chanExact );
|
ans = _mm_blendv_epi8( ans, val, chanExact );
|
||||||
return ans;
|
return ans;
|
||||||
}
|
}
|
||||||
|
|
||||||
__m128i RGBAVectorSIMD::ToPixel(const __m128i &qmask, const int pBit) const {
|
__m128i RGBAVectorSIMD::ToPixel(const __m128i &qmask, const int pBit) const {
|
||||||
|
|
||||||
// !SPEED! We should figure out a way to get rid of these scalar operations.
|
// !SPEED! We should figure out a way to get rid of these scalar operations.
|
||||||
#ifdef HAS_SSE_POPCNT
|
#ifdef HAS_SSE_POPCNT
|
||||||
const uint32 prec = _mm_popcnt_u32(((uint32 *)(&qmask))[0]);
|
const uint32 prec = _mm_popcnt_u32(((uint32 *)(&qmask))[0]);
|
||||||
#else
|
#else
|
||||||
const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]);
|
const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
assert(r >= 0.0f && r <= 255.0f);
|
assert(r >= 0.0f && r <= 255.0f);
|
||||||
assert(g >= 0.0f && g <= 255.0f);
|
assert(g >= 0.0f && g <= 255.0f);
|
||||||
assert(b >= 0.0f && b <= 255.0f);
|
assert(b >= 0.0f && b <= 255.0f);
|
||||||
assert(a >= 0.0f && a <= 255.0f);
|
assert(a >= 0.0f && a <= 255.0f);
|
||||||
assert(((uint32 *)(&qmask))[3] == 0xFF || ((uint32 *)(&qmask))[3] == ((uint32 *)(&qmask))[0]);
|
assert(((uint32 *)(&qmask))[3] == 0xFF || ((uint32 *)(&qmask))[3] == ((uint32 *)(&qmask))[0]);
|
||||||
assert(((uint32 *)(&qmask))[2] == ((uint32 *)(&qmask))[1] && ((uint32 *)(&qmask))[0] == ((uint32 *)(&qmask))[1]);
|
assert(((uint32 *)(&qmask))[2] == ((uint32 *)(&qmask))[1] && ((uint32 *)(&qmask))[0] == ((uint32 *)(&qmask))[1]);
|
||||||
|
|
||||||
const __m128i val = _mm_cvtps_epi32( _mm_add_ps(kHalfVector, vec) );
|
const __m128i val = _mm_cvtps_epi32( _mm_add_ps(kHalfVector, vec) );
|
||||||
const __m128i pbit = _mm_set1_epi32(!!pBit);
|
const __m128i pbit = _mm_set1_epi32(!!pBit);
|
||||||
|
|
||||||
const __m128i &mask = qmask; // _mm_set_epi32(alphaMask, channelMask, channelMask, channelMask);
|
const __m128i &mask = qmask; // _mm_set_epi32(alphaMask, channelMask, channelMask, channelMask);
|
||||||
const __m128i step = _mm_slli_epi32( kOneVector, 8 - prec );
|
const __m128i step = _mm_slli_epi32( kOneVector, 8 - prec );
|
||||||
|
|
||||||
__m128i lval = _mm_and_si128( val, mask );
|
__m128i lval = _mm_and_si128( val, mask );
|
||||||
__m128i hval = _mm_add_epi32( lval, step );
|
__m128i hval = _mm_add_epi32( lval, step );
|
||||||
|
|
||||||
const __m128i pBitShifted = _mm_slli_epi32(pbit, 7 - prec);
|
const __m128i pBitShifted = _mm_slli_epi32(pbit, 7 - prec);
|
||||||
lval = _mm_or_si128(lval, pBitShifted );
|
lval = _mm_or_si128(lval, pBitShifted );
|
||||||
hval = _mm_or_si128(hval, pBitShifted);
|
hval = _mm_or_si128(hval, pBitShifted);
|
||||||
|
|
||||||
// These next three lines we make sure that after adding the pbit that val is
|
// These next three lines we make sure that after adding the pbit that val is
|
||||||
// still in between lval and hval. If it isn't, then we subtract a
|
// still in between lval and hval. If it isn't, then we subtract a
|
||||||
// step from both. Now, val should be larger than lval and less than
|
// step from both. Now, val should be larger than lval and less than
|
||||||
// hval, but certain situations make this not always the case (e.g. val
|
// hval, but certain situations make this not always the case (e.g. val
|
||||||
// is 0, precision is 4 bits, and pbit is 1). Hence, we add back the
|
// is 0, precision is 4 bits, and pbit is 1). Hence, we add back the
|
||||||
// step if it goes below zero, making it equivalent to hval and so it
|
// step if it goes below zero, making it equivalent to hval and so it
|
||||||
// doesn't matter which we choose.
|
// doesn't matter which we choose.
|
||||||
{
|
{
|
||||||
__m128i cmp = _mm_cmpgt_epi32(lval, val);
|
__m128i cmp = _mm_cmpgt_epi32(lval, val);
|
||||||
cmp = _mm_mullo_epi32(cmp, step);
|
cmp = _mm_mullo_epi32(cmp, step);
|
||||||
lval = _mm_add_epi32(lval, cmp);
|
lval = _mm_add_epi32(lval, cmp);
|
||||||
hval = _mm_add_epi32(hval, cmp);
|
hval = _mm_add_epi32(hval, cmp);
|
||||||
|
|
||||||
cmp = _mm_cmplt_epi32(lval, kZeroVector);
|
cmp = _mm_cmplt_epi32(lval, kZeroVector);
|
||||||
cmp = _mm_mullo_epi32(cmp, step);
|
cmp = _mm_mullo_epi32(cmp, step);
|
||||||
lval = _mm_sub_epi32(lval, cmp);
|
lval = _mm_sub_epi32(lval, cmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
const __m128i lvalShift = _mm_srli_epi32(lval, prec + 1);
|
const __m128i lvalShift = _mm_srli_epi32(lval, prec + 1);
|
||||||
const __m128i hvalShift = _mm_srli_epi32(hval, prec + 1);
|
const __m128i hvalShift = _mm_srli_epi32(hval, prec + 1);
|
||||||
|
|
||||||
lval = _mm_or_si128(lval, lvalShift);
|
lval = _mm_or_si128(lval, lvalShift);
|
||||||
hval = _mm_or_si128(hval, hvalShift);
|
hval = _mm_or_si128(hval, hvalShift);
|
||||||
|
|
||||||
const __m128i lvald = _mm_sub_epi32( val, lval );
|
const __m128i lvald = _mm_sub_epi32( val, lval );
|
||||||
const __m128i hvald = _mm_sub_epi32( hval, val );
|
const __m128i hvald = _mm_sub_epi32( hval, val );
|
||||||
|
|
||||||
const __m128i vd = _mm_cmplt_epi32(lvald, hvald);
|
const __m128i vd = _mm_cmplt_epi32(lvald, hvald);
|
||||||
__m128i ans = _mm_blendv_epi8(hval, lval, vd);
|
__m128i ans = _mm_blendv_epi8(hval, lval, vd);
|
||||||
|
|
||||||
const __m128i chanExact = _mm_cmpeq_epi32(mask, kByteValMask);
|
const __m128i chanExact = _mm_cmpeq_epi32(mask, kByteValMask);
|
||||||
ans = _mm_blendv_epi8( ans, val, chanExact );
|
ans = _mm_blendv_epi8( ans, val, chanExact );
|
||||||
return ans;
|
return ans;
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -276,17 +276,17 @@ __m128i RGBAVectorSIMD::ToPixel(const __m128i &qmask, const int pBit) const {
|
||||||
|
|
||||||
RGBAVectorSIMD RGBAMatrixSIMD::operator *(const RGBAVectorSIMD &p) const {
|
RGBAVectorSIMD RGBAMatrixSIMD::operator *(const RGBAVectorSIMD &p) const {
|
||||||
|
|
||||||
__m128 xVec = _mm_set1_ps( p.x );
|
__m128 xVec = _mm_set1_ps( p.x );
|
||||||
__m128 yVec = _mm_set1_ps( p.y );
|
__m128 yVec = _mm_set1_ps( p.y );
|
||||||
__m128 zVec = _mm_set1_ps( p.z );
|
__m128 zVec = _mm_set1_ps( p.z );
|
||||||
__m128 wVec = _mm_set1_ps( p.w );
|
__m128 wVec = _mm_set1_ps( p.w );
|
||||||
|
|
||||||
__m128 vec1 = _mm_mul_ps( xVec, col[0] );
|
__m128 vec1 = _mm_mul_ps( xVec, col[0] );
|
||||||
__m128 vec2 = _mm_mul_ps( yVec, col[1] );
|
__m128 vec2 = _mm_mul_ps( yVec, col[1] );
|
||||||
__m128 vec3 = _mm_mul_ps( zVec, col[2] );
|
__m128 vec3 = _mm_mul_ps( zVec, col[2] );
|
||||||
__m128 vec4 = _mm_mul_ps( wVec, col[3] );
|
__m128 vec4 = _mm_mul_ps( wVec, col[3] );
|
||||||
|
|
||||||
return RGBAVectorSIMD( _mm_add_ps( _mm_add_ps( vec1, vec2 ), _mm_add_ps( vec3, vec4 ) ) );
|
return RGBAVectorSIMD( _mm_add_ps( _mm_add_ps( vec1, vec2 ), _mm_add_ps( vec3, vec4 ) ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -297,104 +297,104 @@ RGBAVectorSIMD RGBAMatrixSIMD::operator *(const RGBAVectorSIMD &p) const {
|
||||||
|
|
||||||
RGBAClusterSIMD::RGBAClusterSIMD(const RGBAClusterSIMD &left, const RGBAClusterSIMD &right) {
|
RGBAClusterSIMD::RGBAClusterSIMD(const RGBAClusterSIMD &left, const RGBAClusterSIMD &right) {
|
||||||
|
|
||||||
assert(!(left.m_PointBitString & right.m_PointBitString));
|
assert(!(left.m_PointBitString & right.m_PointBitString));
|
||||||
|
|
||||||
*this = left;
|
*this = left;
|
||||||
for(int i = 0; i < right.m_NumPoints; i++) {
|
for(int i = 0; i < right.m_NumPoints; i++) {
|
||||||
|
|
||||||
const RGBAVectorSIMD &p = right.m_DataPoints[i];
|
const RGBAVectorSIMD &p = right.m_DataPoints[i];
|
||||||
|
|
||||||
assert(m_NumPoints < kMaxNumDataPoints);
|
assert(m_NumPoints < kMaxNumDataPoints);
|
||||||
m_Total += p;
|
m_Total += p;
|
||||||
m_DataPoints[m_NumPoints++] = p;
|
m_DataPoints[m_NumPoints++] = p;
|
||||||
|
|
||||||
m_Min.vec = _mm_min_ps(m_Min.vec, p.vec);
|
m_Min.vec = _mm_min_ps(m_Min.vec, p.vec);
|
||||||
m_Max.vec = _mm_max_ps(m_Max.vec, p.vec);
|
m_Max.vec = _mm_max_ps(m_Max.vec, p.vec);
|
||||||
}
|
}
|
||||||
|
|
||||||
m_PointBitString = left.m_PointBitString | right.m_PointBitString;
|
m_PointBitString = left.m_PointBitString | right.m_PointBitString;
|
||||||
m_PrincipalAxisCached = false;
|
m_PrincipalAxisCached = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RGBAClusterSIMD::AddPoint(const RGBAVectorSIMD &p, int idx) {
|
void RGBAClusterSIMD::AddPoint(const RGBAVectorSIMD &p, int idx) {
|
||||||
assert(m_NumPoints < kMaxNumDataPoints);
|
assert(m_NumPoints < kMaxNumDataPoints);
|
||||||
m_Total += p;
|
m_Total += p;
|
||||||
m_DataPoints[m_NumPoints++] = p;
|
m_DataPoints[m_NumPoints++] = p;
|
||||||
m_PointBitString |= 1 << idx;
|
m_PointBitString |= 1 << idx;
|
||||||
|
|
||||||
m_Min.vec = _mm_min_ps(m_Min.vec, p.vec);
|
m_Min.vec = _mm_min_ps(m_Min.vec, p.vec);
|
||||||
m_Max.vec = _mm_max_ps(m_Max.vec, p.vec);
|
m_Max.vec = _mm_max_ps(m_Max.vec, p.vec);
|
||||||
}
|
}
|
||||||
|
|
||||||
float RGBAClusterSIMD::QuantizedError(const RGBAVectorSIMD &p1, const RGBAVectorSIMD &p2, const uint8 nBuckets, const __m128i &bitMask, const int pbits[2], __m128i *indices) const {
|
float RGBAClusterSIMD::QuantizedError(const RGBAVectorSIMD &p1, const RGBAVectorSIMD &p2, const uint8 nBuckets, const __m128i &bitMask, const int pbits[2], __m128i *indices) const {
|
||||||
|
|
||||||
// nBuckets should be a power of two.
|
// nBuckets should be a power of two.
|
||||||
assert(!(nBuckets & (nBuckets - 1)));
|
assert(!(nBuckets & (nBuckets - 1)));
|
||||||
|
|
||||||
#ifdef HAS_SSE_POPCNT
|
#ifdef HAS_SSE_POPCNT
|
||||||
const uint8 indexPrec = 8-_mm_popcnt_u32(~(nBuckets - 1) & 0xFF);
|
const uint8 indexPrec = 8-_mm_popcnt_u32(~(nBuckets - 1) & 0xFF);
|
||||||
#else
|
#else
|
||||||
const uint8 indexPrec = 8-popcnt32(~(nBuckets - 1) & 0xFF);
|
const uint8 indexPrec = 8-popcnt32(~(nBuckets - 1) & 0xFF);
|
||||||
#endif
|
#endif
|
||||||
assert(indexPrec >= 2 && indexPrec <= 4);
|
assert(indexPrec >= 2 && indexPrec <= 4);
|
||||||
|
|
||||||
typedef __m128i tInterpPair[2];
|
typedef __m128i tInterpPair[2];
|
||||||
typedef tInterpPair tInterpLevel[16];
|
typedef tInterpPair tInterpLevel[16];
|
||||||
const tInterpLevel *interpVals = kBC7InterpolationValuesSIMD + (indexPrec - 1);
|
const tInterpLevel *interpVals = kBC7InterpolationValuesSIMD + (indexPrec - 1);
|
||||||
|
|
||||||
__m128i qp1, qp2;
|
__m128i qp1, qp2;
|
||||||
if(pbits) {
|
if(pbits) {
|
||||||
qp1 = p1.ToPixel(bitMask, pbits[0]);
|
qp1 = p1.ToPixel(bitMask, pbits[0]);
|
||||||
qp2 = p2.ToPixel(bitMask, pbits[1]);
|
qp2 = p2.ToPixel(bitMask, pbits[1]);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
qp1 = p1.ToPixel(bitMask);
|
qp1 = p1.ToPixel(bitMask);
|
||||||
qp2 = p2.ToPixel(bitMask);
|
qp2 = p2.ToPixel(bitMask);
|
||||||
}
|
}
|
||||||
|
|
||||||
__m128 errorMetricVec = _mm_load_ps( BC7C::GetErrorMetric() );
|
__m128 errorMetricVec = _mm_load_ps( BC7C::GetErrorMetric() );
|
||||||
|
|
||||||
__m128 totalError = kZero;
|
__m128 totalError = kZero;
|
||||||
for(int i = 0; i < m_NumPoints; i++) {
|
for(int i = 0; i < m_NumPoints; i++) {
|
||||||
|
|
||||||
const __m128i pixel = m_DataPoints[i].ToPixel( kByteValMask );
|
const __m128i pixel = m_DataPoints[i].ToPixel( kByteValMask );
|
||||||
|
|
||||||
__m128 minError = _mm_set1_ps(FLT_MAX);
|
__m128 minError = _mm_set1_ps(FLT_MAX);
|
||||||
__m128i bestBucket = _mm_set1_epi32(-1);
|
__m128i bestBucket = _mm_set1_epi32(-1);
|
||||||
for(int j = 0; j < nBuckets; j++) {
|
for(int j = 0; j < nBuckets; j++) {
|
||||||
|
|
||||||
const __m128i jVec = _mm_set1_epi32(j);
|
const __m128i jVec = _mm_set1_epi32(j);
|
||||||
const __m128i interp0 = (*interpVals)[j][0];
|
const __m128i interp0 = (*interpVals)[j][0];
|
||||||
const __m128i interp1 = (*interpVals)[j][1];
|
const __m128i interp1 = (*interpVals)[j][1];
|
||||||
|
|
||||||
const __m128i ip0 = _mm_mullo_epi32( qp1, interp0 );
|
const __m128i ip0 = _mm_mullo_epi32( qp1, interp0 );
|
||||||
const __m128i ip1 = _mm_mullo_epi32( qp2, interp1 );
|
const __m128i ip1 = _mm_mullo_epi32( qp2, interp1 );
|
||||||
const __m128i ip = _mm_add_epi32( *((const __m128i *)kThirtyTwoVector), _mm_add_epi32( ip0, ip1 ) );
|
const __m128i ip = _mm_add_epi32( *((const __m128i *)kThirtyTwoVector), _mm_add_epi32( ip0, ip1 ) );
|
||||||
const __m128i dist = sad( _mm_and_si128( _mm_srli_epi32( ip, 6 ), kByteValMask ), pixel );
|
const __m128i dist = sad( _mm_and_si128( _mm_srli_epi32( ip, 6 ), kByteValMask ), pixel );
|
||||||
__m128 errorVec = _mm_cvtepi32_ps( dist );
|
__m128 errorVec = _mm_cvtepi32_ps( dist );
|
||||||
|
|
||||||
errorVec = _mm_mul_ps( errorVec, errorMetricVec );
|
errorVec = _mm_mul_ps( errorVec, errorMetricVec );
|
||||||
errorVec = _mm_mul_ps( errorVec, errorVec );
|
errorVec = _mm_mul_ps( errorVec, errorVec );
|
||||||
errorVec = _mm_hadd_ps( errorVec, errorVec );
|
errorVec = _mm_hadd_ps( errorVec, errorVec );
|
||||||
errorVec = _mm_hadd_ps( errorVec, errorVec );
|
errorVec = _mm_hadd_ps( errorVec, errorVec );
|
||||||
|
|
||||||
const __m128 cmp = _mm_cmple_ps( errorVec, minError );
|
const __m128 cmp = _mm_cmple_ps( errorVec, minError );
|
||||||
minError = _mm_blendv_ps( minError, errorVec, cmp );
|
minError = _mm_blendv_ps( minError, errorVec, cmp );
|
||||||
bestBucket = _mm_blendv_epi8( bestBucket, jVec, _mm_castps_si128( cmp ) );
|
bestBucket = _mm_blendv_epi8( bestBucket, jVec, _mm_castps_si128( cmp ) );
|
||||||
|
|
||||||
// Conceptually, once the error starts growing, it doesn't stop growing (we're moving
|
// Conceptually, once the error starts growing, it doesn't stop growing (we're moving
|
||||||
// farther away from the reference point along the line). Hence we can early out here.
|
// farther away from the reference point along the line). Hence we can early out here.
|
||||||
// However, quanitzation artifacts mean that this is not ALWAYS the case, so we do suffer
|
// However, quanitzation artifacts mean that this is not ALWAYS the case, so we do suffer
|
||||||
// about 0.01 RMS error.
|
// about 0.01 RMS error.
|
||||||
if(!((uint8 *)(&cmp))[0])
|
if(!((uint8 *)(&cmp))[0])
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
totalError = _mm_add_ps(totalError, minError);
|
totalError = _mm_add_ps(totalError, minError);
|
||||||
if(indices) ((uint32 *)indices)[i] = ((uint32 *)(&bestBucket))[0];
|
if(indices) ((uint32 *)indices)[i] = ((uint32 *)(&bestBucket))[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
return ((float *)(&totalError))[0];
|
return ((float *)(&totalError))[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -404,69 +404,69 @@ float RGBAClusterSIMD::QuantizedError(const RGBAVectorSIMD &p1, const RGBAVector
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
void ClampEndpoints(RGBAVectorSIMD &p1, RGBAVectorSIMD &p2) {
|
void ClampEndpoints(RGBAVectorSIMD &p1, RGBAVectorSIMD &p2) {
|
||||||
p1.vec = _mm_min_ps( kByteMax, _mm_max_ps( p1.vec, kZero ) );
|
p1.vec = _mm_min_ps( kByteMax, _mm_max_ps( p1.vec, kZero ) );
|
||||||
p2.vec = _mm_min_ps( kByteMax, _mm_max_ps( p2.vec, kZero ) );
|
p2.vec = _mm_min_ps( kByteMax, _mm_max_ps( p2.vec, kZero ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
void GetPrincipalAxis(const RGBAClusterSIMD &c, RGBADirSIMD &axis) {
|
void GetPrincipalAxis(const RGBAClusterSIMD &c, RGBADirSIMD &axis) {
|
||||||
|
|
||||||
if(c.GetNumPoints() == 2) {
|
if(c.GetNumPoints() == 2) {
|
||||||
axis = c.GetPoint(1) - c.GetPoint(0);
|
axis = c.GetPoint(1) - c.GetPoint(0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAVectorSIMD avg = c.GetTotal();
|
RGBAVectorSIMD avg = c.GetTotal();
|
||||||
avg /= float(c.GetNumPoints());
|
avg /= float(c.GetNumPoints());
|
||||||
|
|
||||||
// We use these vectors for calculating the covariance matrix...
|
// We use these vectors for calculating the covariance matrix...
|
||||||
RGBAVectorSIMD toPts[kMaxNumDataPoints];
|
RGBAVectorSIMD toPts[kMaxNumDataPoints];
|
||||||
RGBAVectorSIMD toPtsMax(-FLT_MAX);
|
RGBAVectorSIMD toPtsMax(-FLT_MAX);
|
||||||
for(int i = 0; i < c.GetNumPoints(); i++) {
|
for(int i = 0; i < c.GetNumPoints(); i++) {
|
||||||
toPts[i] = c.GetPoint(i) - avg;
|
toPts[i] = c.GetPoint(i) - avg;
|
||||||
toPtsMax.vec = _mm_max_ps(toPtsMax.vec, toPts[i].vec);
|
toPtsMax.vec = _mm_max_ps(toPtsMax.vec, toPts[i].vec);
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAMatrixSIMD covMatrix;
|
RGBAMatrixSIMD covMatrix;
|
||||||
|
|
||||||
// Compute covariance.
|
// Compute covariance.
|
||||||
const float fNumPoints = float(c.GetNumPoints());
|
const float fNumPoints = float(c.GetNumPoints());
|
||||||
for(int i = 0; i < kNumColorChannels; i++) {
|
for(int i = 0; i < kNumColorChannels; i++) {
|
||||||
for(int j = 0; j <= i; j++) {
|
for(int j = 0; j <= i; j++) {
|
||||||
|
|
||||||
float sum = 0.0;
|
float sum = 0.0;
|
||||||
for(int k = 0; k < c.GetNumPoints(); k++) {
|
for(int k = 0; k < c.GetNumPoints(); k++) {
|
||||||
sum += toPts[k].c[i] * toPts[k].c[j];
|
sum += toPts[k].c[i] * toPts[k].c[j];
|
||||||
}
|
}
|
||||||
|
|
||||||
covMatrix(i, j) = sum / fNumPoints;
|
covMatrix(i, j) = sum / fNumPoints;
|
||||||
covMatrix(j, i) = covMatrix(i, j);
|
covMatrix(j, i) = covMatrix(i, j);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// !SPEED! Find eigenvectors by using the power method. This is good because the
|
// !SPEED! Find eigenvectors by using the power method. This is good because the
|
||||||
// matrix is only 4x4, which allows us to use SIMD...
|
// matrix is only 4x4, which allows us to use SIMD...
|
||||||
RGBAVectorSIMD b = toPtsMax;
|
RGBAVectorSIMD b = toPtsMax;
|
||||||
assert(b.Length() > 0);
|
assert(b.Length() > 0);
|
||||||
b /= b.Length();
|
b /= b.Length();
|
||||||
|
|
||||||
RGBAVectorSIMD newB = covMatrix * b;
|
RGBAVectorSIMD newB = covMatrix * b;
|
||||||
|
|
||||||
// !HACK! If the principal eigenvector of the covariance matrix
|
// !HACK! If the principal eigenvector of the covariance matrix
|
||||||
// converges to zero, that means that the points lie equally
|
// converges to zero, that means that the points lie equally
|
||||||
// spaced on a sphere in this space. In this (extremely rare)
|
// spaced on a sphere in this space. In this (extremely rare)
|
||||||
// situation, just choose a point and use it as the principal
|
// situation, just choose a point and use it as the principal
|
||||||
// direction.
|
// direction.
|
||||||
const float newBlen = newB.Length();
|
const float newBlen = newB.Length();
|
||||||
if(newBlen < 1e-10) {
|
if(newBlen < 1e-10) {
|
||||||
axis = toPts[0];
|
axis = toPts[0];
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
for(int i = 0; i < 8; i++) {
|
for(int i = 0; i < 8; i++) {
|
||||||
newB = covMatrix * b;
|
newB = covMatrix * b;
|
||||||
newB.Normalize();
|
newB.Normalize();
|
||||||
b = newB;
|
b = newB;
|
||||||
}
|
}
|
||||||
|
|
||||||
axis = b;
|
axis = b;
|
||||||
}
|
}
|
||||||
|
|
|
@ -81,270 +81,270 @@ static const __m128 kEpsilonSIMD = _mm_set1_ps(1e-8f);
|
||||||
class RGBAVectorSIMD {
|
class RGBAVectorSIMD {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
union {
|
union {
|
||||||
struct { float r, g, b, a; };
|
struct { float r, g, b, a; };
|
||||||
struct { float x, y, z, w; };
|
struct { float x, y, z, w; };
|
||||||
float c[4];
|
float c[4];
|
||||||
__m128 vec;
|
__m128 vec;
|
||||||
};
|
};
|
||||||
|
|
||||||
RGBAVectorSIMD() : r(-1.0), g(-1.0), b(-1.0), a(-1.0) { }
|
RGBAVectorSIMD() : r(-1.0), g(-1.0), b(-1.0), a(-1.0) { }
|
||||||
RGBAVectorSIMD(uint32 pixel) :
|
RGBAVectorSIMD(uint32 pixel) :
|
||||||
r(float(pixel & 0xFF)),
|
r(float(pixel & 0xFF)),
|
||||||
g(float((pixel >> 8) & 0xFF)),
|
g(float((pixel >> 8) & 0xFF)),
|
||||||
b(float((pixel >> 16) & 0xFF)),
|
b(float((pixel >> 16) & 0xFF)),
|
||||||
a(float((pixel >> 24) & 0xFF))
|
a(float((pixel >> 24) & 0xFF))
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
explicit RGBAVectorSIMD(float _r, float _g, float _b, float _a) :
|
explicit RGBAVectorSIMD(float _r, float _g, float _b, float _a) :
|
||||||
r(_r), g(_g), b(_b), a(_a) { }
|
r(_r), g(_g), b(_b), a(_a) { }
|
||||||
|
|
||||||
explicit RGBAVectorSIMD(float cc) : r(cc), g(cc), b(cc), a(cc) { }
|
explicit RGBAVectorSIMD(float cc) : r(cc), g(cc), b(cc), a(cc) { }
|
||||||
|
|
||||||
RGBAVectorSIMD (const __m128 &newVec) : vec(newVec) { }
|
RGBAVectorSIMD (const __m128 &newVec) : vec(newVec) { }
|
||||||
RGBAVectorSIMD (const RGBAVectorSIMD &other) : vec(other.vec) { }
|
RGBAVectorSIMD (const RGBAVectorSIMD &other) : vec(other.vec) { }
|
||||||
|
|
||||||
RGBAVectorSIMD operator +(const RGBAVectorSIMD &p) const {
|
RGBAVectorSIMD operator +(const RGBAVectorSIMD &p) const {
|
||||||
return RGBAVectorSIMD( _mm_add_ps(this->vec, p.vec) );
|
return RGBAVectorSIMD( _mm_add_ps(this->vec, p.vec) );
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAVectorSIMD &operator +=(const RGBAVectorSIMD &p) {
|
RGBAVectorSIMD &operator +=(const RGBAVectorSIMD &p) {
|
||||||
this->vec = _mm_add_ps(this->vec, p.vec);
|
this->vec = _mm_add_ps(this->vec, p.vec);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAVectorSIMD operator -(const RGBAVectorSIMD &p) const {
|
RGBAVectorSIMD operator -(const RGBAVectorSIMD &p) const {
|
||||||
return RGBAVectorSIMD( _mm_sub_ps(this->vec, p.vec) );
|
return RGBAVectorSIMD( _mm_sub_ps(this->vec, p.vec) );
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAVectorSIMD &operator -=(const RGBAVectorSIMD &p) {
|
RGBAVectorSIMD &operator -=(const RGBAVectorSIMD &p) {
|
||||||
this->vec = _mm_sub_ps(this->vec, p.vec);
|
this->vec = _mm_sub_ps(this->vec, p.vec);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAVectorSIMD operator /(const float s) const {
|
RGBAVectorSIMD operator /(const float s) const {
|
||||||
return RGBAVectorSIMD( _mm_div_ps(this->vec, _mm_set1_ps(s) ) );
|
return RGBAVectorSIMD( _mm_div_ps(this->vec, _mm_set1_ps(s) ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAVectorSIMD &operator /=(const float s) {
|
RGBAVectorSIMD &operator /=(const float s) {
|
||||||
this->vec = _mm_div_ps(this->vec, _mm_set1_ps(s) );
|
this->vec = _mm_div_ps(this->vec, _mm_set1_ps(s) );
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
float operator *(const RGBAVectorSIMD &p) const {
|
float operator *(const RGBAVectorSIMD &p) const {
|
||||||
__m128 mul = _mm_mul_ps(this->vec, p.vec);
|
__m128 mul = _mm_mul_ps(this->vec, p.vec);
|
||||||
mul = _mm_hadd_ps(mul, mul);
|
mul = _mm_hadd_ps(mul, mul);
|
||||||
mul = _mm_hadd_ps(mul, mul);
|
mul = _mm_hadd_ps(mul, mul);
|
||||||
return ((float *)(&mul))[0];
|
return ((float *)(&mul))[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
void Normalize() {
|
void Normalize() {
|
||||||
__m128 rsqrt = _mm_rsqrt_ps( _mm_set1_ps( (*this) * (*this) ) );
|
__m128 rsqrt = _mm_rsqrt_ps( _mm_set1_ps( (*this) * (*this) ) );
|
||||||
vec = _mm_mul_ps( vec, rsqrt );
|
vec = _mm_mul_ps( vec, rsqrt );
|
||||||
}
|
}
|
||||||
|
|
||||||
float Length() const {
|
float Length() const {
|
||||||
return sqrt((*this) * (*this));
|
return sqrt((*this) * (*this));
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAVectorSIMD &operator *=(const RGBAVectorSIMD &v) {
|
RGBAVectorSIMD &operator *=(const RGBAVectorSIMD &v) {
|
||||||
this->vec = _mm_mul_ps(this->vec, v.vec);
|
this->vec = _mm_mul_ps(this->vec, v.vec);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAVectorSIMD operator *(const float s) const {
|
RGBAVectorSIMD operator *(const float s) const {
|
||||||
return RGBAVectorSIMD( _mm_mul_ps( this->vec, _mm_set1_ps(s) ) );
|
return RGBAVectorSIMD( _mm_mul_ps( this->vec, _mm_set1_ps(s) ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
friend RGBAVectorSIMD operator *(const float s, const RGBAVectorSIMD &p) {
|
friend RGBAVectorSIMD operator *(const float s, const RGBAVectorSIMD &p) {
|
||||||
return RGBAVectorSIMD( _mm_mul_ps( p.vec, _mm_set1_ps(s) ) );
|
return RGBAVectorSIMD( _mm_mul_ps( p.vec, _mm_set1_ps(s) ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAVectorSIMD &operator *=(const float s) {
|
RGBAVectorSIMD &operator *=(const float s) {
|
||||||
this->vec = _mm_mul_ps( this->vec, _mm_set1_ps(s) );
|
this->vec = _mm_mul_ps( this->vec, _mm_set1_ps(s) );
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
float &operator [](const int i) {
|
float &operator [](const int i) {
|
||||||
return c[i];
|
return c[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
friend bool operator ==(const RGBAVectorSIMD &rhs, const RGBAVectorSIMD &lhs) {
|
friend bool operator ==(const RGBAVectorSIMD &rhs, const RGBAVectorSIMD &lhs) {
|
||||||
__m128 d = _mm_sub_ps(rhs.vec, lhs.vec);
|
__m128 d = _mm_sub_ps(rhs.vec, lhs.vec);
|
||||||
d = _mm_mul_ps(d, d);
|
d = _mm_mul_ps(d, d);
|
||||||
__m128 cmp = _mm_cmpgt_ps(d, kEpsilonSIMD);
|
__m128 cmp = _mm_cmpgt_ps(d, kEpsilonSIMD);
|
||||||
cmp = _mm_hadd_ps(cmp, cmp);
|
cmp = _mm_hadd_ps(cmp, cmp);
|
||||||
cmp = _mm_hadd_ps(cmp, cmp);
|
cmp = _mm_hadd_ps(cmp, cmp);
|
||||||
return ((float *)(&cmp))[0] == 0.0f;
|
return ((float *)(&cmp))[0] == 0.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
friend bool operator !=(const RGBAVectorSIMD &rhs, const RGBAVectorSIMD &lhs) {
|
friend bool operator !=(const RGBAVectorSIMD &rhs, const RGBAVectorSIMD &lhs) {
|
||||||
return !(rhs == lhs);
|
return !(rhs == lhs);
|
||||||
}
|
}
|
||||||
|
|
||||||
operator float *() {
|
operator float *() {
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Quantize this point.
|
// Quantize this point.
|
||||||
__m128i ToPixel(const __m128i &channelMask, const int pBit) const;
|
__m128i ToPixel(const __m128i &channelMask, const int pBit) const;
|
||||||
__m128i ToPixel(const __m128i &channelMask) const;
|
__m128i ToPixel(const __m128i &channelMask) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
class RGBAMatrixSIMD {
|
class RGBAMatrixSIMD {
|
||||||
private:
|
private:
|
||||||
union {
|
union {
|
||||||
float m[kNumColorChannels*kNumColorChannels];
|
float m[kNumColorChannels*kNumColorChannels];
|
||||||
struct {
|
struct {
|
||||||
float m1, m5, m9, m13;
|
float m1, m5, m9, m13;
|
||||||
float m2, m6, m10, m14;
|
float m2, m6, m10, m14;
|
||||||
float m3, m7, m11, m15;
|
float m3, m7, m11, m15;
|
||||||
float m4, m8, m12, m16;
|
float m4, m8, m12, m16;
|
||||||
};
|
};
|
||||||
__m128 col[kNumColorChannels];
|
__m128 col[kNumColorChannels];
|
||||||
};
|
};
|
||||||
|
|
||||||
RGBAMatrixSIMD(const float *arr) {
|
RGBAMatrixSIMD(const float *arr) {
|
||||||
memcpy(m, arr, sizeof(m));
|
memcpy(m, arr, sizeof(m));
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAMatrixSIMD(const __m128 newcol[kNumColorChannels]) {
|
RGBAMatrixSIMD(const __m128 newcol[kNumColorChannels]) {
|
||||||
for(int i = 0; i < kNumColorChannels; i++)
|
for(int i = 0; i < kNumColorChannels; i++)
|
||||||
col[i] = newcol[i];
|
col[i] = newcol[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
RGBAMatrixSIMD() :
|
RGBAMatrixSIMD() :
|
||||||
m1(1.0f), m2(0.0f), m3(0.0f), m4(0.0f),
|
m1(1.0f), m2(0.0f), m3(0.0f), m4(0.0f),
|
||||||
m5(0.0f), m6(1.0f), m7(0.0f), m8(0.0f),
|
m5(0.0f), m6(1.0f), m7(0.0f), m8(0.0f),
|
||||||
m9(0.0f), m10(0.0f), m11(1.0f), m12(0.0f),
|
m9(0.0f), m10(0.0f), m11(1.0f), m12(0.0f),
|
||||||
m13(0.0f), m14(0.0f), m15(0.0f), m16(1.0f)
|
m13(0.0f), m14(0.0f), m15(0.0f), m16(1.0f)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
RGBAMatrixSIMD &operator =(const RGBAMatrixSIMD &other) {
|
RGBAMatrixSIMD &operator =(const RGBAMatrixSIMD &other) {
|
||||||
memcpy(m, other.m, sizeof(m));
|
memcpy(m, other.m, sizeof(m));
|
||||||
return (*this);
|
return (*this);
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAMatrixSIMD operator +(const RGBAMatrixSIMD &p) const {
|
RGBAMatrixSIMD operator +(const RGBAMatrixSIMD &p) const {
|
||||||
RGBAMatrixSIMD newm;
|
RGBAMatrixSIMD newm;
|
||||||
for(int i = 0; i < kNumColorChannels; i++) {
|
for(int i = 0; i < kNumColorChannels; i++) {
|
||||||
newm.col[i] = _mm_add_ps(col[i], p.col[i]);
|
newm.col[i] = _mm_add_ps(col[i], p.col[i]);
|
||||||
}
|
}
|
||||||
return newm;
|
return newm;
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAMatrixSIMD &operator +=(const RGBAMatrixSIMD &p) {
|
RGBAMatrixSIMD &operator +=(const RGBAMatrixSIMD &p) {
|
||||||
for(int i = 0; i < kNumColorChannels; i++) {
|
for(int i = 0; i < kNumColorChannels; i++) {
|
||||||
col[i] = _mm_add_ps( col[i], p.col[i] );
|
col[i] = _mm_add_ps( col[i], p.col[i] );
|
||||||
}
|
}
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAMatrixSIMD operator -(const RGBAMatrixSIMD &p) const {
|
RGBAMatrixSIMD operator -(const RGBAMatrixSIMD &p) const {
|
||||||
RGBAMatrixSIMD newm;
|
RGBAMatrixSIMD newm;
|
||||||
for(int i = 0; i < kNumColorChannels; i++) {
|
for(int i = 0; i < kNumColorChannels; i++) {
|
||||||
newm.col[i] = _mm_sub_ps( col[i], p.col[i] );
|
newm.col[i] = _mm_sub_ps( col[i], p.col[i] );
|
||||||
}
|
}
|
||||||
return newm;
|
return newm;
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAMatrixSIMD &operator -=(const RGBAMatrixSIMD &p) {
|
RGBAMatrixSIMD &operator -=(const RGBAMatrixSIMD &p) {
|
||||||
for(int i = 0; i < kNumColorChannels; i++) {
|
for(int i = 0; i < kNumColorChannels; i++) {
|
||||||
col[i] = _mm_sub_ps( col[i], p.col[i] );
|
col[i] = _mm_sub_ps( col[i], p.col[i] );
|
||||||
}
|
}
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAMatrixSIMD operator /(const float s) const {
|
RGBAMatrixSIMD operator /(const float s) const {
|
||||||
__m128 f = _mm_set1_ps(s);
|
__m128 f = _mm_set1_ps(s);
|
||||||
RGBAMatrixSIMD newm;
|
RGBAMatrixSIMD newm;
|
||||||
|
|
||||||
for(int i = 0; i < kNumColorChannels; i++) {
|
for(int i = 0; i < kNumColorChannels; i++) {
|
||||||
newm.col[i] = _mm_div_ps( col[i], f );
|
newm.col[i] = _mm_div_ps( col[i], f );
|
||||||
}
|
}
|
||||||
|
|
||||||
return newm;
|
return newm;
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAMatrixSIMD &operator /=(const float s) {
|
RGBAMatrixSIMD &operator /=(const float s) {
|
||||||
|
|
||||||
__m128 f = _mm_set1_ps(s);
|
__m128 f = _mm_set1_ps(s);
|
||||||
|
|
||||||
for(int i = 0; i < kNumColorChannels; i++) {
|
for(int i = 0; i < kNumColorChannels; i++) {
|
||||||
col[i] = _mm_div_ps(col[i], f);
|
col[i] = _mm_div_ps(col[i], f);
|
||||||
}
|
}
|
||||||
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAMatrixSIMD operator *(const float s) const {
|
RGBAMatrixSIMD operator *(const float s) const {
|
||||||
__m128 f = _mm_set1_ps(s);
|
__m128 f = _mm_set1_ps(s);
|
||||||
|
|
||||||
RGBAMatrixSIMD newm;
|
RGBAMatrixSIMD newm;
|
||||||
for(int i = 0; i < kNumColorChannels; i++) {
|
for(int i = 0; i < kNumColorChannels; i++) {
|
||||||
newm.col[i] = _mm_mul_ps( col[i], f );
|
newm.col[i] = _mm_mul_ps( col[i], f );
|
||||||
}
|
}
|
||||||
return newm;
|
return newm;
|
||||||
}
|
}
|
||||||
|
|
||||||
friend RGBAMatrixSIMD operator *(const float s, const RGBAMatrixSIMD &p) {
|
friend RGBAMatrixSIMD operator *(const float s, const RGBAMatrixSIMD &p) {
|
||||||
__m128 f = _mm_set1_ps(s);
|
__m128 f = _mm_set1_ps(s);
|
||||||
RGBAMatrixSIMD newm;
|
RGBAMatrixSIMD newm;
|
||||||
|
|
||||||
for(int i = 0; i < kNumColorChannels; i++) {
|
for(int i = 0; i < kNumColorChannels; i++) {
|
||||||
newm.col[i] = _mm_mul_ps( p.col[i], f );
|
newm.col[i] = _mm_mul_ps( p.col[i], f );
|
||||||
}
|
}
|
||||||
return newm;
|
return newm;
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAMatrixSIMD &operator *=(const float s) {
|
RGBAMatrixSIMD &operator *=(const float s) {
|
||||||
__m128 f = _mm_set1_ps(s);
|
__m128 f = _mm_set1_ps(s);
|
||||||
for(int i = 0; i < kNumColorChannels; i++)
|
for(int i = 0; i < kNumColorChannels; i++)
|
||||||
col[i] = _mm_mul_ps(col[i], f);
|
col[i] = _mm_mul_ps(col[i], f);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
float &operator ()(const int i, const int j) {
|
float &operator ()(const int i, const int j) {
|
||||||
return (*this)[j*4 + i];
|
return (*this)[j*4 + i];
|
||||||
}
|
}
|
||||||
|
|
||||||
float &operator [](const int i) {
|
float &operator [](const int i) {
|
||||||
return m[i];
|
return m[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
friend bool operator ==(const RGBAMatrixSIMD &rhs, const RGBAMatrixSIMD &lhs) {
|
friend bool operator ==(const RGBAMatrixSIMD &rhs, const RGBAMatrixSIMD &lhs) {
|
||||||
|
|
||||||
__m128 sum = _mm_set1_ps(0.0f);
|
__m128 sum = _mm_set1_ps(0.0f);
|
||||||
for(int i = 0; i < kNumColorChannels; i++) {
|
for(int i = 0; i < kNumColorChannels; i++) {
|
||||||
__m128 d = _mm_sub_ps(rhs.col[i], lhs.col[i]);
|
__m128 d = _mm_sub_ps(rhs.col[i], lhs.col[i]);
|
||||||
d = _mm_mul_ps(d, d);
|
d = _mm_mul_ps(d, d);
|
||||||
__m128 cmp = _mm_cmpgt_ps(d, kEpsilonSIMD);
|
__m128 cmp = _mm_cmpgt_ps(d, kEpsilonSIMD);
|
||||||
cmp = _mm_hadd_ps(cmp, cmp);
|
cmp = _mm_hadd_ps(cmp, cmp);
|
||||||
cmp = _mm_hadd_ps(cmp, cmp);
|
cmp = _mm_hadd_ps(cmp, cmp);
|
||||||
sum = _mm_add_ps(sum, cmp);
|
sum = _mm_add_ps(sum, cmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(((float *)(&sum))[0] != 0)
|
if(((float *)(&sum))[0] != 0)
|
||||||
return false;
|
return false;
|
||||||
else
|
else
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
operator float *() {
|
operator float *() {
|
||||||
return m;
|
return m;
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAVectorSIMD operator *(const RGBAVectorSIMD &p) const;
|
RGBAVectorSIMD operator *(const RGBAVectorSIMD &p) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
class RGBADirSIMD : public RGBAVectorSIMD {
|
class RGBADirSIMD : public RGBAVectorSIMD {
|
||||||
public:
|
public:
|
||||||
RGBADirSIMD() : RGBAVectorSIMD() { }
|
RGBADirSIMD() : RGBAVectorSIMD() { }
|
||||||
RGBADirSIMD(const RGBAVectorSIMD &p) : RGBAVectorSIMD(p) {
|
RGBADirSIMD(const RGBAVectorSIMD &p) : RGBAVectorSIMD(p) {
|
||||||
this->Normalize();
|
this->Normalize();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Makes sure that the values of the endpoints lie between 0 and 1.
|
// Makes sure that the values of the endpoints lie between 0 and 1.
|
||||||
|
@ -353,69 +353,69 @@ extern void ClampEndpoints(RGBAVectorSIMD &p1, RGBAVectorSIMD &p2);
|
||||||
class RGBAClusterSIMD {
|
class RGBAClusterSIMD {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
RGBAClusterSIMD() :
|
RGBAClusterSIMD() :
|
||||||
m_NumPoints(0), m_Total(0.0f),
|
m_NumPoints(0), m_Total(0.0f),
|
||||||
m_PointBitString(0),
|
m_PointBitString(0),
|
||||||
m_Min(FLT_MAX),
|
m_Min(FLT_MAX),
|
||||||
m_Max(-FLT_MAX),
|
m_Max(-FLT_MAX),
|
||||||
m_PrincipalAxisCached(false)
|
m_PrincipalAxisCached(false)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
RGBAClusterSIMD(const RGBAClusterSIMD &c) :
|
RGBAClusterSIMD(const RGBAClusterSIMD &c) :
|
||||||
m_NumPoints(c.m_NumPoints),
|
m_NumPoints(c.m_NumPoints),
|
||||||
m_Total(c.m_Total),
|
m_Total(c.m_Total),
|
||||||
m_PointBitString(c.m_PointBitString),
|
m_PointBitString(c.m_PointBitString),
|
||||||
m_Min(c.m_Min),
|
m_Min(c.m_Min),
|
||||||
m_Max(c.m_Max),
|
m_Max(c.m_Max),
|
||||||
m_PrincipalAxisCached(false)
|
m_PrincipalAxisCached(false)
|
||||||
{
|
{
|
||||||
memcpy(this->m_DataPoints, c.m_DataPoints, m_NumPoints * sizeof(RGBAVectorSIMD));
|
memcpy(this->m_DataPoints, c.m_DataPoints, m_NumPoints * sizeof(RGBAVectorSIMD));
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAClusterSIMD(const RGBAClusterSIMD &left, const RGBAClusterSIMD &right);
|
RGBAClusterSIMD(const RGBAClusterSIMD &left, const RGBAClusterSIMD &right);
|
||||||
RGBAClusterSIMD(const RGBAVectorSIMD &p, int idx) :
|
RGBAClusterSIMD(const RGBAVectorSIMD &p, int idx) :
|
||||||
m_NumPoints(1),
|
m_NumPoints(1),
|
||||||
m_Total(p),
|
m_Total(p),
|
||||||
m_PointBitString(0),
|
m_PointBitString(0),
|
||||||
m_Min(p), m_Max(p),
|
m_Min(p), m_Max(p),
|
||||||
m_PrincipalAxisCached(false)
|
m_PrincipalAxisCached(false)
|
||||||
{
|
{
|
||||||
m_DataPoints[0] = p;
|
m_DataPoints[0] = p;
|
||||||
m_PointBitString |= (1 << idx);
|
m_PointBitString |= (1 << idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
RGBAVectorSIMD GetTotal() const { return m_Total; }
|
RGBAVectorSIMD GetTotal() const { return m_Total; }
|
||||||
const RGBAVectorSIMD &GetPoint(int idx) const { return m_DataPoints[idx]; }
|
const RGBAVectorSIMD &GetPoint(int idx) const { return m_DataPoints[idx]; }
|
||||||
int GetNumPoints() const { return m_NumPoints; }
|
int GetNumPoints() const { return m_NumPoints; }
|
||||||
RGBAVectorSIMD GetAvg() const { return m_Total / float(m_NumPoints); }
|
RGBAVectorSIMD GetAvg() const { return m_Total / float(m_NumPoints); }
|
||||||
|
|
||||||
void AddPoint(const RGBAVectorSIMD &p, int idx);
|
void AddPoint(const RGBAVectorSIMD &p, int idx);
|
||||||
|
|
||||||
void GetBoundingBox(RGBAVectorSIMD &Min, RGBAVectorSIMD &Max) const {
|
void GetBoundingBox(RGBAVectorSIMD &Min, RGBAVectorSIMD &Max) const {
|
||||||
Min = m_Min, Max = m_Max;
|
Min = m_Min, Max = m_Max;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns the error if we were to quantize the colors right now with the given number of buckets and bit mask.
|
// Returns the error if we were to quantize the colors right now with the given number of buckets and bit mask.
|
||||||
float QuantizedError(const RGBAVectorSIMD &p1, const RGBAVectorSIMD &p2, const uint8 nBuckets, const __m128i &bitMask, const int pbits[2] = NULL, __m128i *indices = NULL) const;
|
float QuantizedError(const RGBAVectorSIMD &p1, const RGBAVectorSIMD &p2, const uint8 nBuckets, const __m128i &bitMask, const int pbits[2] = NULL, __m128i *indices = NULL) const;
|
||||||
|
|
||||||
bool AllSamePoint() const { return m_Max == m_Min; }
|
bool AllSamePoint() const { return m_Max == m_Min; }
|
||||||
int GetPointBitString() const { return m_PointBitString; }
|
int GetPointBitString() const { return m_PointBitString; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
// The number of points in the cluster.
|
// The number of points in the cluster.
|
||||||
int m_NumPoints;
|
int m_NumPoints;
|
||||||
|
|
||||||
RGBAVectorSIMD m_Total;
|
RGBAVectorSIMD m_Total;
|
||||||
|
|
||||||
// The points in the cluster.
|
// The points in the cluster.
|
||||||
RGBAVectorSIMD m_DataPoints[kMaxNumDataPoints];
|
RGBAVectorSIMD m_DataPoints[kMaxNumDataPoints];
|
||||||
|
|
||||||
RGBAVectorSIMD m_Min, m_Max;
|
RGBAVectorSIMD m_Min, m_Max;
|
||||||
int m_PointBitString;
|
int m_PointBitString;
|
||||||
|
|
||||||
RGBADirSIMD m_PrincipalAxis;
|
RGBADirSIMD m_PrincipalAxis;
|
||||||
bool m_PrincipalAxisCached;
|
bool m_PrincipalAxisCached;
|
||||||
};
|
};
|
||||||
|
|
||||||
extern void GetPrincipalAxis(const RGBAClusterSIMD &c, RGBADirSIMD &axis);
|
extern void GetPrincipalAxis(const RGBAClusterSIMD &c, RGBADirSIMD &axis);
|
||||||
|
|
|
@ -64,18 +64,18 @@ void PrintUsage() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void ExtractBasename(const char *filename, char *buf, uint32 bufSz) {
|
void ExtractBasename(const char *filename, char *buf, uint32 bufSz) {
|
||||||
size_t len = strlen(filename);
|
size_t len = strlen(filename);
|
||||||
const char *end = filename + len;
|
const char *end = filename + len;
|
||||||
while(--end != filename) {
|
while(--end != filename) {
|
||||||
if(*end == '.')
|
if(*end == '.')
|
||||||
{
|
{
|
||||||
uint32 numChars = int32(end - filename + 1);
|
uint32 numChars = int32(end - filename + 1);
|
||||||
uint32 toCopy = (numChars > bufSz)? bufSz : numChars;
|
uint32 toCopy = (numChars > bufSz)? bufSz : numChars;
|
||||||
memcpy(buf, filename, toCopy);
|
memcpy(buf, filename, toCopy);
|
||||||
buf[toCopy - 1] = '\0';
|
buf[toCopy - 1] = '\0';
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int _tmain(int argc, _TCHAR* argv[])
|
int _tmain(int argc, _TCHAR* argv[])
|
||||||
|
@ -175,7 +175,7 @@ int _tmain(int argc, _TCHAR* argv[])
|
||||||
if(numThreads > 1 && bSaveLog) {
|
if(numThreads > 1 && bSaveLog) {
|
||||||
bSaveLog = false;
|
bSaveLog = false;
|
||||||
fprintf(stderr, "WARNING: Will not save log because implementation is not thread safe.\n"
|
fprintf(stderr, "WARNING: Will not save log because implementation is not thread safe.\n"
|
||||||
"If you'd like, send a complaint to pavel@cs.unc.edu to get this done faster.\n");
|
"If you'd like, send a complaint to pavel@cs.unc.edu to get this done faster.\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
if(fileArg == argc) {
|
if(fileArg == argc) {
|
||||||
|
@ -183,16 +183,16 @@ int _tmain(int argc, _TCHAR* argv[])
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
char basename[256];
|
char basename[256];
|
||||||
ExtractBasename(argv[fileArg], basename, 256);
|
ExtractBasename(argv[fileArg], basename, 256);
|
||||||
|
|
||||||
ImageFile file (argv[fileArg]);
|
ImageFile file (argv[fileArg]);
|
||||||
if(!file.Load()) {
|
if(!file.Load()) {
|
||||||
fprintf(stderr, "Error loading file: %s\n", argv[fileArg]);
|
fprintf(stderr, "Error loading file: %s\n", argv[fileArg]);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
const Image *img = file.GetImage();
|
const Image *img = file.GetImage();
|
||||||
|
|
||||||
int numBlocks = (img->GetWidth() * img->GetHeight())/16;
|
int numBlocks = (img->GetWidth() * img->GetHeight())/16;
|
||||||
BlockStatManager *statManager = NULL;
|
BlockStatManager *statManager = NULL;
|
||||||
|
@ -224,14 +224,14 @@ int _tmain(int argc, _TCHAR* argv[])
|
||||||
}
|
}
|
||||||
|
|
||||||
if(bSaveLog) {
|
if(bSaveLog) {
|
||||||
strcat_s(basename, ".log");
|
strcat_s(basename, ".log");
|
||||||
statManager->ToFile(basename);
|
statManager->ToFile(basename);
|
||||||
basename[strlen(basename) - 4] = '\0';
|
basename[strlen(basename) - 4] = '\0';
|
||||||
}
|
}
|
||||||
strcat_s(basename, "-bc7.png");
|
strcat_s(basename, "-bc7.png");
|
||||||
Image cImg (*ci);
|
Image cImg (*ci);
|
||||||
ImageFile cImgFile (basename, eFileFormat_PNG, cImg);
|
ImageFile cImgFile (basename, eFileFormat_PNG, cImg);
|
||||||
cImgFile.Write();
|
cImgFile.Write();
|
||||||
|
|
||||||
// Cleanup
|
// Cleanup
|
||||||
delete ci;
|
delete ci;
|
||||||
|
|
|
@ -54,7 +54,7 @@ class ImageLoader;
|
||||||
class Image {
|
class Image {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Image(const CompressedImage &);
|
Image(const CompressedImage &);
|
||||||
Image(const ImageLoader &);
|
Image(const ImageLoader &);
|
||||||
~Image();
|
~Image();
|
||||||
|
|
||||||
|
|
|
@ -165,8 +165,8 @@ BlockStatManager::~BlockStatManager() {
|
||||||
|
|
||||||
if(m_Mutex)
|
if(m_Mutex)
|
||||||
{
|
{
|
||||||
delete m_Mutex;
|
delete m_Mutex;
|
||||||
m_Mutex = 0;
|
m_Mutex = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -206,15 +206,15 @@ void BlockStatManager::ToFile(const CHAR *filename) {
|
||||||
|
|
||||||
CHAR str[256];
|
CHAR str[256];
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
_sntprintf_s(str, 256, _TRUNCATE, "%d,%s\n", i, statStr);
|
_sntprintf_s(str, 256, _TRUNCATE, "%d,%s\n", i, statStr);
|
||||||
#else
|
#else
|
||||||
snprintf(str, 256, "%d,%s\n", i, statStr);
|
snprintf(str, 256, "%d,%s\n", i, statStr);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
uint32 strLen = uint32(strlen(str));
|
uint32 strLen = uint32(strlen(str));
|
||||||
if(strLen > 255) {
|
if(strLen > 255) {
|
||||||
str[255] = '\n';
|
str[255] = '\n';
|
||||||
strLen = 256;
|
strLen = 256;
|
||||||
}
|
}
|
||||||
|
|
||||||
fstr.Write((uint8 *)str, strLen);
|
fstr.Write((uint8 *)str, strLen);
|
||||||
|
|
|
@ -75,11 +75,11 @@ CompressedImage::CompressedImage(
|
||||||
const ECompressionFormat format,
|
const ECompressionFormat format,
|
||||||
const unsigned char *data
|
const unsigned char *data
|
||||||
)
|
)
|
||||||
: m_Width(width)
|
: m_Width(width)
|
||||||
, m_Height(height)
|
, m_Height(height)
|
||||||
, m_Format(format)
|
, m_Format(format)
|
||||||
, m_Data(0)
|
, m_Data(0)
|
||||||
, m_DataSz(0)
|
, m_DataSz(0)
|
||||||
{
|
{
|
||||||
InitData(data);
|
InitData(data);
|
||||||
}
|
}
|
||||||
|
|
|
@ -95,14 +95,14 @@ Image::Image(const CompressedImage &ci)
|
||||||
: m_Width(ci.GetWidth())
|
: m_Width(ci.GetWidth())
|
||||||
, m_Height(ci.GetHeight())
|
, m_Height(ci.GetHeight())
|
||||||
{
|
{
|
||||||
unsigned int bufSz = ci.GetWidth() * ci.GetHeight() * 4;
|
unsigned int bufSz = ci.GetWidth() * ci.GetHeight() * 4;
|
||||||
m_PixelData = new uint8[ bufSz ];
|
m_PixelData = new uint8[ bufSz ];
|
||||||
if(!m_PixelData) { fprintf(stderr, "%s\n", "Out of memory!"); return; }
|
if(!m_PixelData) { fprintf(stderr, "%s\n", "Out of memory!"); return; }
|
||||||
|
|
||||||
if(!ci.DecompressImage(m_PixelData, bufSz)) {
|
if(!ci.DecompressImage(m_PixelData, bufSz)) {
|
||||||
fprintf(stderr, "Error decompressing image!\n");
|
fprintf(stderr, "Error decompressing image!\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Image::Image(const ImageLoader &loader)
|
Image::Image(const ImageLoader &loader)
|
||||||
|
|
|
@ -74,23 +74,23 @@ class StopWatchImpl;
|
||||||
class StopWatch
|
class StopWatch
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
StopWatch();
|
StopWatch();
|
||||||
StopWatch(const StopWatch &);
|
StopWatch(const StopWatch &);
|
||||||
|
|
||||||
~StopWatch();
|
~StopWatch();
|
||||||
|
|
||||||
StopWatch &operator=(const StopWatch &);
|
StopWatch &operator=(const StopWatch &);
|
||||||
|
|
||||||
void Start();
|
void Start();
|
||||||
void Stop();
|
void Stop();
|
||||||
void Reset();
|
void Reset();
|
||||||
|
|
||||||
double TimeInSeconds() const;
|
double TimeInSeconds() const;
|
||||||
double TimeInMilliseconds() const;
|
double TimeInMilliseconds() const;
|
||||||
double TimeInMicroseconds() const;
|
double TimeInMicroseconds() const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
StopWatchImpl *impl;
|
StopWatchImpl *impl;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // __TEXCOMP_STOP_WATCH_H__
|
#endif // __TEXCOMP_STOP_WATCH_H__
|
||||||
|
|
|
@ -404,5 +404,5 @@ bool CompressImageData(
|
||||||
}
|
}
|
||||||
|
|
||||||
void YieldThread() {
|
void YieldThread() {
|
||||||
TCThread::Yield();
|
TCThread::Yield();
|
||||||
}
|
}
|
||||||
|
|
|
@ -115,7 +115,7 @@ ThreadGroup::ThreadGroup( int numThreads, const unsigned char *inBuf, unsigned i
|
||||||
, m_CompressedBlockSize(
|
, m_CompressedBlockSize(
|
||||||
(func == BC7C::Compress
|
(func == BC7C::Compress
|
||||||
#ifdef HAS_SSE_41
|
#ifdef HAS_SSE_41
|
||||||
|| func == BC7C::CompressImageBC7SIMD
|
|| func == BC7C::CompressImageBC7SIMD
|
||||||
#endif
|
#endif
|
||||||
)?
|
)?
|
||||||
16
|
16
|
||||||
|
@ -125,7 +125,7 @@ ThreadGroup::ThreadGroup( int numThreads, const unsigned char *inBuf, unsigned i
|
||||||
, m_UncompressedBlockSize(
|
, m_UncompressedBlockSize(
|
||||||
(func == BC7C::Compress
|
(func == BC7C::Compress
|
||||||
#ifdef HAS_SSE_41
|
#ifdef HAS_SSE_41
|
||||||
|| func == BC7C::CompressImageBC7SIMD
|
|| func == BC7C::CompressImageBC7SIMD
|
||||||
#endif
|
#endif
|
||||||
)?
|
)?
|
||||||
64
|
64
|
||||||
|
|
|
@ -81,40 +81,39 @@ void WorkerThread::operator()() {
|
||||||
bool quitFlag = false;
|
bool quitFlag = false;
|
||||||
while(!quitFlag) {
|
while(!quitFlag) {
|
||||||
|
|
||||||
switch(m_Parent->AcceptThreadData(m_ThreadIdx))
|
switch(m_Parent->AcceptThreadData(m_ThreadIdx)) {
|
||||||
{
|
|
||||||
|
|
||||||
case eAction_Quit:
|
case eAction_Quit:
|
||||||
{
|
{
|
||||||
quitFlag = true;
|
quitFlag = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case eAction_Wait:
|
case eAction_Wait:
|
||||||
{
|
{
|
||||||
TCThread::Yield();
|
TCThread::Yield();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case eAction_DoWork:
|
case eAction_DoWork:
|
||||||
{
|
{
|
||||||
const uint8 *src = m_Parent->GetSrcForThread(m_ThreadIdx);
|
const uint8 *src = m_Parent->GetSrcForThread(m_ThreadIdx);
|
||||||
uint8 *dst = m_Parent->GetDstForThread(m_ThreadIdx);
|
uint8 *dst = m_Parent->GetDstForThread(m_ThreadIdx);
|
||||||
|
|
||||||
CompressionJob cj (src, dst, 4 * m_Parent->GetNumBlocksForThread(m_ThreadIdx), 4);
|
CompressionJob cj (src, dst, 4 * m_Parent->GetNumBlocksForThread(m_ThreadIdx), 4);
|
||||||
if(f)
|
if(f)
|
||||||
(*f)(cj);
|
(*f)(cj);
|
||||||
else
|
else
|
||||||
(*fStat)(cj, *statManager);
|
(*fStat)(cj, *statManager);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Unrecognized thread command!\n");
|
fprintf(stderr, "Unrecognized thread command!\n");
|
||||||
quitFlag = true;
|
quitFlag = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -244,10 +243,10 @@ WorkerThread::EAction WorkerQueue::AcceptThreadData(uint32 threadIdx) {
|
||||||
if(m_NextBlock == totalBlocks) {
|
if(m_NextBlock == totalBlocks) {
|
||||||
if(m_NumCompressions < m_TotalNumCompressions) {
|
if(m_NumCompressions < m_TotalNumCompressions) {
|
||||||
if(++m_WaitingThreads == m_ActiveThreads) {
|
if(++m_WaitingThreads == m_ActiveThreads) {
|
||||||
m_NextBlock = 0;
|
m_NextBlock = 0;
|
||||||
m_WaitingThreads = 0;
|
m_WaitingThreads = 0;
|
||||||
} else {
|
} else {
|
||||||
return WorkerThread::eAction_Wait;
|
return WorkerThread::eAction_Wait;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|
|
@ -1,3 +1,55 @@
|
||||||
|
/* FasTC
|
||||||
|
* Copyright (c) 2012 University of North Carolina at Chapel Hill.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission to use, copy, modify, and distribute this software and its
|
||||||
|
* documentation for educational, research, and non-profit purposes, without
|
||||||
|
* fee, and without a written agreement is hereby granted, provided that the
|
||||||
|
* above copyright notice, this paragraph, and the following four paragraphs
|
||||||
|
* appear in all copies.
|
||||||
|
*
|
||||||
|
* Permission to incorporate this software into commercial products may be
|
||||||
|
* obtained by contacting the authors or the Office of Technology Development
|
||||||
|
* at the University of North Carolina at Chapel Hill <otd@unc.edu>.
|
||||||
|
*
|
||||||
|
* This software program and documentation are copyrighted by the University of
|
||||||
|
* North Carolina at Chapel Hill. The software program and documentation are
|
||||||
|
* supplied "as is," without any accompanying services from the University of
|
||||||
|
* North Carolina at Chapel Hill or the authors. The University of North
|
||||||
|
* Carolina at Chapel Hill and the authors do not warrant that the operation of
|
||||||
|
* the program will be uninterrupted or error-free. The end-user understands
|
||||||
|
* that the program was developed for research purposes and is advised not to
|
||||||
|
* rely exclusively on the program for any reason.
|
||||||
|
*
|
||||||
|
* IN NO EVENT SHALL THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE
|
||||||
|
* AUTHORS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL,
|
||||||
|
* OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
|
||||||
|
* THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF NORTH CAROLINA
|
||||||
|
* AT CHAPEL HILL OR THE AUTHORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
||||||
|
* DAMAGE.
|
||||||
|
*
|
||||||
|
* THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS SPECIFICALLY
|
||||||
|
* DISCLAIM ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY
|
||||||
|
* STATUTORY WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON
|
||||||
|
* AN "AS IS" BASIS, AND THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND
|
||||||
|
* THE AUTHORS HAVE NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
|
||||||
|
* ENHANCEMENTS, OR MODIFICATIONS.
|
||||||
|
*
|
||||||
|
* Please send all BUG REPORTS to <pavel@cs.unc.edu>.
|
||||||
|
*
|
||||||
|
* The authors may be contacted via:
|
||||||
|
*
|
||||||
|
* Pavel Krajcevski
|
||||||
|
* Dept of Computer Science
|
||||||
|
* 201 S Columbia St
|
||||||
|
* Frederick P. Brooks, Jr. Computer Science Bldg
|
||||||
|
* Chapel Hill, NC 27599-3175
|
||||||
|
* USA
|
||||||
|
*
|
||||||
|
* <http://gamma.cs.unc.edu/FasTC/>
|
||||||
|
*/
|
||||||
|
|
||||||
#include "FileStream.h"
|
#include "FileStream.h"
|
||||||
|
|
||||||
#include <Windows.h>
|
#include <Windows.h>
|
||||||
|
@ -54,7 +106,7 @@ public:
|
||||||
: m_ReferenceCount(1)
|
: m_ReferenceCount(1)
|
||||||
{
|
{
|
||||||
|
|
||||||
DWORD dwDesiredAccess = GENERIC_READ;
|
DWORD dwDesiredAccess = GENERIC_READ;
|
||||||
DWORD dwOpenAction = OPEN_EXISTING;
|
DWORD dwOpenAction = OPEN_EXISTING;
|
||||||
switch(mode) {
|
switch(mode) {
|
||||||
default:
|
default:
|
||||||
|
@ -71,13 +123,13 @@ public:
|
||||||
|
|
||||||
case eFileMode_WriteAppend:
|
case eFileMode_WriteAppend:
|
||||||
case eFileMode_WriteBinaryAppend:
|
case eFileMode_WriteBinaryAppend:
|
||||||
dwDesiredAccess = FILE_APPEND_DATA;
|
dwDesiredAccess = FILE_APPEND_DATA;
|
||||||
dwOpenAction = CREATE_NEW;
|
dwOpenAction = CREATE_NEW;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_Handle = CreateFile(filename, dwDesiredAccess, 0, NULL, dwOpenAction, FILE_ATTRIBUTE_NORMAL, NULL);
|
m_Handle = CreateFile(filename, dwDesiredAccess, 0, NULL, dwOpenAction, FILE_ATTRIBUTE_NORMAL, NULL);
|
||||||
if(m_Handle == INVALID_HANDLE_VALUE) {
|
if(m_Handle == INVALID_HANDLE_VALUE) {
|
||||||
ErrorExit(TEXT("CreateFile"));
|
ErrorExit(TEXT("CreateFile"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -145,15 +197,14 @@ FileStream::~FileStream() {
|
||||||
|
|
||||||
int32 FileStream::Read(uint8 *buf, uint32 bufSz) {
|
int32 FileStream::Read(uint8 *buf, uint32 bufSz) {
|
||||||
|
|
||||||
if(
|
if(m_Mode == eFileMode_Write ||
|
||||||
m_Mode == eFileMode_Write ||
|
|
||||||
m_Mode == eFileMode_WriteBinary ||
|
m_Mode == eFileMode_WriteBinary ||
|
||||||
m_Mode == eFileMode_WriteAppend ||
|
m_Mode == eFileMode_WriteAppend ||
|
||||||
m_Mode == eFileMode_WriteBinaryAppend
|
m_Mode == eFileMode_WriteBinaryAppend
|
||||||
) {
|
) {
|
||||||
CHAR errStr[256];
|
CHAR errStr[256];
|
||||||
_sntprintf_s(errStr, 256, "Cannot read from file '%s': File opened for reading.", m_Filename);
|
_sntprintf_s(errStr, 256, "Cannot read from file '%s': File opened for reading.", m_Filename);
|
||||||
OutputDebugString(errStr);
|
OutputDebugString(errStr);
|
||||||
return -2;
|
return -2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -163,27 +214,27 @@ int32 FileStream::Read(uint8 *buf, uint32 bufSz) {
|
||||||
|
|
||||||
DWORD oldPosition = SetFilePointer(fp, 0, NULL, FILE_CURRENT);
|
DWORD oldPosition = SetFilePointer(fp, 0, NULL, FILE_CURRENT);
|
||||||
if(INVALID_SET_FILE_POINTER == oldPosition) {
|
if(INVALID_SET_FILE_POINTER == oldPosition) {
|
||||||
CHAR errStr[256];
|
CHAR errStr[256];
|
||||||
_sntprintf_s(errStr, 256, "Error querying the file position before reading from file '%s'(0x%x).", m_Filename, GetLastError());
|
_sntprintf_s(errStr, 256, "Error querying the file position before reading from file '%s'(0x%x).", m_Filename, GetLastError());
|
||||||
OutputDebugString(errStr);
|
OutputDebugString(errStr);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
DWORD amtRead;
|
DWORD amtRead;
|
||||||
BOOL success = ReadFile(fp, buf, bufSz, &amtRead, NULL);
|
BOOL success = ReadFile(fp, buf, bufSz, &amtRead, NULL);
|
||||||
if(!success) {
|
if(!success) {
|
||||||
CHAR errStr[256];
|
CHAR errStr[256];
|
||||||
_sntprintf_s(errStr, 256, "Error reading from file '%s'.", m_Filename);
|
_sntprintf_s(errStr, 256, "Error reading from file '%s'.", m_Filename);
|
||||||
OutputDebugString(errStr);
|
OutputDebugString(errStr);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
DWORD newPosition = SetFilePointer(fp, 0, NULL, FILE_CURRENT);
|
DWORD newPosition = SetFilePointer(fp, 0, NULL, FILE_CURRENT);
|
||||||
if(INVALID_SET_FILE_POINTER == newPosition) {
|
if(INVALID_SET_FILE_POINTER == newPosition) {
|
||||||
CHAR errStr[256];
|
CHAR errStr[256];
|
||||||
_sntprintf_s(errStr, 256, "Error querying the file position after reading from file '%s'(0x%x).", m_Filename, GetLastError());
|
_sntprintf_s(errStr, 256, "Error querying the file position after reading from file '%s'(0x%x).", m_Filename, GetLastError());
|
||||||
OutputDebugString(errStr);
|
OutputDebugString(errStr);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
return newPosition - oldPosition;
|
return newPosition - oldPosition;
|
||||||
|
@ -194,9 +245,9 @@ int32 FileStream::Write(const uint8 *buf, uint32 bufSz) {
|
||||||
m_Mode == eFileMode_Read ||
|
m_Mode == eFileMode_Read ||
|
||||||
m_Mode == eFileMode_ReadBinary
|
m_Mode == eFileMode_ReadBinary
|
||||||
) {
|
) {
|
||||||
CHAR errStr[256];
|
CHAR errStr[256];
|
||||||
_sntprintf_s(errStr, 256, "Cannot write to file '%s': File opened for writing.", m_Filename);
|
_sntprintf_s(errStr, 256, "Cannot write to file '%s': File opened for writing.", m_Filename);
|
||||||
OutputDebugString(errStr);
|
OutputDebugString(errStr);
|
||||||
return -2;
|
return -2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -213,10 +264,10 @@ int32 FileStream::Write(const uint8 *buf, uint32 bufSz) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if(INVALID_SET_FILE_POINTER == dwPos) {
|
if(INVALID_SET_FILE_POINTER == dwPos) {
|
||||||
CHAR errStr[256];
|
CHAR errStr[256];
|
||||||
_sntprintf_s(errStr, 256, "Error querying the file position before reading to file '%s'(0x%x).", m_Filename, GetLastError());
|
_sntprintf_s(errStr, 256, "Error querying the file position before reading to file '%s'(0x%x).", m_Filename, GetLastError());
|
||||||
OutputDebugString(errStr);
|
OutputDebugString(errStr);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
while(!LockFile(fp, dwPos, 0, bufSz, 0)) Sleep(1);
|
while(!LockFile(fp, dwPos, 0, bufSz, 0)) Sleep(1);
|
||||||
|
@ -227,9 +278,9 @@ int32 FileStream::Write(const uint8 *buf, uint32 bufSz) {
|
||||||
UnlockFile(fp, dwPos, 0, bufSz, 0);
|
UnlockFile(fp, dwPos, 0, bufSz, 0);
|
||||||
|
|
||||||
if(!success) {
|
if(!success) {
|
||||||
CHAR errStr[256];
|
CHAR errStr[256];
|
||||||
_sntprintf_s(errStr, 256, "Error writing to file '%s'.", m_Filename);
|
_sntprintf_s(errStr, 256, "Error writing to file '%s'.", m_Filename);
|
||||||
OutputDebugString(errStr);
|
OutputDebugString(errStr);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -244,10 +295,10 @@ int32 FileStream::Tell() {
|
||||||
|
|
||||||
DWORD pos = SetFilePointer(fp, 0, NULL, FILE_CURRENT);
|
DWORD pos = SetFilePointer(fp, 0, NULL, FILE_CURRENT);
|
||||||
if(INVALID_SET_FILE_POINTER == pos) {
|
if(INVALID_SET_FILE_POINTER == pos) {
|
||||||
CHAR errStr[256];
|
CHAR errStr[256];
|
||||||
_sntprintf_s(errStr, 256, "Error querying the file position before reading to file '%s'(0x%x).", m_Filename, GetLastError());
|
_sntprintf_s(errStr, 256, "Error querying the file position before reading to file '%s'(0x%x).", m_Filename, GetLastError());
|
||||||
OutputDebugString(errStr);
|
OutputDebugString(errStr);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
return pos;
|
return pos;
|
||||||
|
@ -264,17 +315,17 @@ bool FileStream::Seek(uint32 offset, ESeekPosition pos) {
|
||||||
|
|
||||||
DWORD origin = FILE_BEGIN;
|
DWORD origin = FILE_BEGIN;
|
||||||
switch(pos) {
|
switch(pos) {
|
||||||
default:
|
default:
|
||||||
case eSeekPosition_Beginning:
|
case eSeekPosition_Beginning:
|
||||||
// Do nothing
|
// Do nothing
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case eSeekPosition_Current:
|
case eSeekPosition_Current:
|
||||||
origin = FILE_CURRENT;
|
origin = FILE_CURRENT;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case eSeekPosition_End:
|
case eSeekPosition_End:
|
||||||
origin = FILE_END;
|
origin = FILE_END;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -172,54 +172,54 @@ bool ImageLoader::LoadImage() {
|
||||||
|
|
||||||
// For each block, visit the pixels in sequential order
|
// For each block, visit the pixels in sequential order
|
||||||
for(uint32 y = i; y < i+4; y++) {
|
for(uint32 y = i; y < i+4; y++) {
|
||||||
for(uint32 x = j; x < j+4; x++) {
|
for(uint32 x = j; x < j+4; x++) {
|
||||||
|
|
||||||
if(y >= m_Height || x >= m_Width) {
|
if(y >= m_Height || x >= m_Width) {
|
||||||
m_PixelData[byteIdx++] = 0; // r
|
m_PixelData[byteIdx++] = 0; // r
|
||||||
m_PixelData[byteIdx++] = 0; // g
|
m_PixelData[byteIdx++] = 0; // g
|
||||||
m_PixelData[byteIdx++] = 0; // b
|
m_PixelData[byteIdx++] = 0; // b
|
||||||
m_PixelData[byteIdx++] = 0; // a
|
m_PixelData[byteIdx++] = 0; // a
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int redVal = GetChannelForPixel(x, y, 0);
|
unsigned int redVal = GetChannelForPixel(x, y, 0);
|
||||||
if(redVal == INT_MAX)
|
if(redVal == INT_MAX)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
unsigned int greenVal = redVal;
|
unsigned int greenVal = redVal;
|
||||||
unsigned int blueVal = redVal;
|
unsigned int blueVal = redVal;
|
||||||
|
|
||||||
if(GetGreenChannelPrecision() > 0) {
|
if(GetGreenChannelPrecision() > 0) {
|
||||||
greenVal = GetChannelForPixel(x, y, 1);
|
greenVal = GetChannelForPixel(x, y, 1);
|
||||||
if(greenVal == INT_MAX)
|
if(greenVal == INT_MAX)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(GetBlueChannelPrecision() > 0) {
|
if(GetBlueChannelPrecision() > 0) {
|
||||||
blueVal = GetChannelForPixel(x, y, 2);
|
blueVal = GetChannelForPixel(x, y, 2);
|
||||||
if(blueVal == INT_MAX)
|
if(blueVal == INT_MAX)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int alphaVal = 0xFF;
|
unsigned int alphaVal = 0xFF;
|
||||||
if(GetAlphaChannelPrecision() > 0) {
|
if(GetAlphaChannelPrecision() > 0) {
|
||||||
alphaVal = GetChannelForPixel(x, y, 3);
|
alphaVal = GetChannelForPixel(x, y, 3);
|
||||||
if(alphaVal == INT_MAX)
|
if(alphaVal == INT_MAX)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Red channel
|
// Red channel
|
||||||
m_PixelData[byteIdx++] = redVal & 0xFF;
|
m_PixelData[byteIdx++] = redVal & 0xFF;
|
||||||
|
|
||||||
// Green channel
|
// Green channel
|
||||||
m_PixelData[byteIdx++] = greenVal & 0xFF;
|
m_PixelData[byteIdx++] = greenVal & 0xFF;
|
||||||
|
|
||||||
// Blue channel
|
// Blue channel
|
||||||
m_PixelData[byteIdx++] = blueVal & 0xFF;
|
m_PixelData[byteIdx++] = blueVal & 0xFF;
|
||||||
|
|
||||||
// Alpha channel
|
// Alpha channel
|
||||||
m_PixelData[byteIdx++] = alphaVal & 0xFF;
|
m_PixelData[byteIdx++] = alphaVal & 0xFF;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -54,10 +54,8 @@ static void ReportError(const char *msg) {
|
||||||
|
|
||||||
class PNGStreamReader {
|
class PNGStreamReader {
|
||||||
public:
|
public:
|
||||||
static void ReadDataFromStream(
|
static void ReadDataFromStream(png_structp png_ptr,
|
||||||
png_structp png_ptr,
|
png_bytep outBytes, png_size_t byteCountToRead
|
||||||
png_bytep outBytes,
|
|
||||||
png_size_t byteCountToRead
|
|
||||||
) {
|
) {
|
||||||
png_voidp io_ptr = png_get_io_ptr( png_ptr );
|
png_voidp io_ptr = png_get_io_ptr( png_ptr );
|
||||||
if( io_ptr == NULL ) {
|
if( io_ptr == NULL ) {
|
||||||
|
@ -120,9 +118,9 @@ bool ImageLoaderPNG::ReadData() {
|
||||||
int colorType = -1;
|
int colorType = -1;
|
||||||
|
|
||||||
if( 1 != png_get_IHDR(png_ptr, info_ptr,
|
if( 1 != png_get_IHDR(png_ptr, info_ptr,
|
||||||
(png_uint_32 *)(&m_Width), (png_uint_32 *)(&m_Height),
|
(png_uint_32 *)(&m_Width), (png_uint_32 *)(&m_Height),
|
||||||
&bitDepth, &colorType,
|
&bitDepth, &colorType,
|
||||||
NULL, NULL, NULL)
|
NULL, NULL, NULL)
|
||||||
) {
|
) {
|
||||||
ReportError("Could not read PNG header");
|
ReportError("Could not read PNG header");
|
||||||
png_destroy_read_struct(&png_ptr, NULL, NULL);
|
png_destroy_read_struct(&png_ptr, NULL, NULL);
|
||||||
|
@ -140,33 +138,33 @@ bool ImageLoaderPNG::ReadData() {
|
||||||
png_bytep rowData = new png_byte[bpr];
|
png_bytep rowData = new png_byte[bpr];
|
||||||
|
|
||||||
switch(colorType) {
|
switch(colorType) {
|
||||||
default:
|
default:
|
||||||
case PNG_COLOR_TYPE_PALETTE:
|
case PNG_COLOR_TYPE_PALETTE:
|
||||||
ReportError("PNG color type unsupported");
|
ReportError("PNG color type unsupported");
|
||||||
png_destroy_read_struct(&png_ptr, NULL, NULL);
|
png_destroy_read_struct(&png_ptr, NULL, NULL);
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
case PNG_COLOR_TYPE_GRAY: {
|
case PNG_COLOR_TYPE_GRAY: {
|
||||||
m_RedChannelPrecision = bitDepth;
|
m_RedChannelPrecision = bitDepth;
|
||||||
m_RedData = new unsigned char[numPixels];
|
m_RedData = new unsigned char[numPixels];
|
||||||
|
|
||||||
for(uint32 i = 0; i < m_Height; i++) {
|
for(uint32 i = 0; i < m_Height; i++) {
|
||||||
|
|
||||||
png_read_row(png_ptr, rowData, NULL);
|
png_read_row(png_ptr, rowData, NULL);
|
||||||
|
|
||||||
unsigned int rowOffset = i * m_Width;
|
unsigned int rowOffset = i * m_Width;
|
||||||
|
|
||||||
unsigned int byteIdx = 0;
|
unsigned int byteIdx = 0;
|
||||||
for(uint32 j = 0; j < m_Width; j++) {
|
for(uint32 j = 0; j < m_Width; j++) {
|
||||||
m_RedData[rowOffset + j] = rowData[byteIdx++];
|
m_RedData[rowOffset + j] = rowData[byteIdx++];
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(byteIdx == bpr);
|
assert(byteIdx == bpr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PNG_COLOR_TYPE_RGB:
|
case PNG_COLOR_TYPE_RGB:
|
||||||
m_RedChannelPrecision = bitDepth;
|
m_RedChannelPrecision = bitDepth;
|
||||||
m_RedData = new unsigned char[numPixels];
|
m_RedData = new unsigned char[numPixels];
|
||||||
m_GreenChannelPrecision = bitDepth;
|
m_GreenChannelPrecision = bitDepth;
|
||||||
|
@ -176,22 +174,22 @@ bool ImageLoaderPNG::ReadData() {
|
||||||
|
|
||||||
for(uint32 i = 0; i < m_Height; i++) {
|
for(uint32 i = 0; i < m_Height; i++) {
|
||||||
|
|
||||||
png_read_row(png_ptr, rowData, NULL);
|
png_read_row(png_ptr, rowData, NULL);
|
||||||
|
|
||||||
unsigned int rowOffset = i * m_Width;
|
unsigned int rowOffset = i * m_Width;
|
||||||
|
|
||||||
unsigned int byteIdx = 0;
|
unsigned int byteIdx = 0;
|
||||||
for(uint32 j = 0; j < m_Width; j++) {
|
for(uint32 j = 0; j < m_Width; j++) {
|
||||||
m_RedData[rowOffset + j] = rowData[byteIdx++];
|
m_RedData[rowOffset + j] = rowData[byteIdx++];
|
||||||
m_GreenData[rowOffset + j] = rowData[byteIdx++];
|
m_GreenData[rowOffset + j] = rowData[byteIdx++];
|
||||||
m_BlueData[rowOffset + j] = rowData[byteIdx++];
|
m_BlueData[rowOffset + j] = rowData[byteIdx++];
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(byteIdx == bpr);
|
assert(byteIdx == bpr);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PNG_COLOR_TYPE_RGB_ALPHA:
|
case PNG_COLOR_TYPE_RGB_ALPHA:
|
||||||
m_RedChannelPrecision = bitDepth;
|
m_RedChannelPrecision = bitDepth;
|
||||||
m_RedData = new unsigned char[numPixels];
|
m_RedData = new unsigned char[numPixels];
|
||||||
m_GreenChannelPrecision = bitDepth;
|
m_GreenChannelPrecision = bitDepth;
|
||||||
|
@ -203,23 +201,23 @@ bool ImageLoaderPNG::ReadData() {
|
||||||
|
|
||||||
for(uint32 i = 0; i < m_Height; i++) {
|
for(uint32 i = 0; i < m_Height; i++) {
|
||||||
|
|
||||||
png_read_row(png_ptr, rowData, NULL);
|
png_read_row(png_ptr, rowData, NULL);
|
||||||
|
|
||||||
unsigned int rowOffset = i * m_Width;
|
unsigned int rowOffset = i * m_Width;
|
||||||
|
|
||||||
unsigned int byteIdx = 0;
|
unsigned int byteIdx = 0;
|
||||||
for(uint32 j = 0; j < m_Width; j++) {
|
for(uint32 j = 0; j < m_Width; j++) {
|
||||||
m_RedData[rowOffset + j] = rowData[byteIdx++];
|
m_RedData[rowOffset + j] = rowData[byteIdx++];
|
||||||
m_GreenData[rowOffset + j] = rowData[byteIdx++];
|
m_GreenData[rowOffset + j] = rowData[byteIdx++];
|
||||||
m_BlueData[rowOffset + j] = rowData[byteIdx++];
|
m_BlueData[rowOffset + j] = rowData[byteIdx++];
|
||||||
m_AlphaData[rowOffset + j] = rowData[byteIdx++];
|
m_AlphaData[rowOffset + j] = rowData[byteIdx++];
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(byteIdx == bpr);
|
assert(byteIdx == bpr);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PNG_COLOR_TYPE_GRAY_ALPHA:
|
case PNG_COLOR_TYPE_GRAY_ALPHA:
|
||||||
m_RedChannelPrecision = bitDepth;
|
m_RedChannelPrecision = bitDepth;
|
||||||
m_RedData = new unsigned char[numPixels];
|
m_RedData = new unsigned char[numPixels];
|
||||||
m_AlphaChannelPrecision = bitDepth;
|
m_AlphaChannelPrecision = bitDepth;
|
||||||
|
@ -227,17 +225,17 @@ bool ImageLoaderPNG::ReadData() {
|
||||||
|
|
||||||
for(uint32 i = 0; i < m_Height; i++) {
|
for(uint32 i = 0; i < m_Height; i++) {
|
||||||
|
|
||||||
png_read_row(png_ptr, rowData, NULL);
|
png_read_row(png_ptr, rowData, NULL);
|
||||||
|
|
||||||
unsigned int rowOffset = i * m_Width;
|
unsigned int rowOffset = i * m_Width;
|
||||||
|
|
||||||
unsigned int byteIdx = 0;
|
unsigned int byteIdx = 0;
|
||||||
for(uint32 j = 0; j < m_Width; j++) {
|
for(uint32 j = 0; j < m_Width; j++) {
|
||||||
m_RedData[rowOffset + j] = rowData[byteIdx++];
|
m_RedData[rowOffset + j] = rowData[byteIdx++];
|
||||||
m_AlphaData[rowOffset + j] = rowData[byteIdx++];
|
m_AlphaData[rowOffset + j] = rowData[byteIdx++];
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(byteIdx == bpr);
|
assert(byteIdx == bpr);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -66,87 +66,87 @@ public:
|
||||||
|
|
||||||
ImageWriterPNG &writer = *(ImageWriterPNG *)(io_ptr);
|
ImageWriterPNG &writer = *(ImageWriterPNG *)(io_ptr);
|
||||||
|
|
||||||
while(writer.m_StreamPosition + byteCountToWrite > writer.m_RawFileDataSz) {
|
while(writer.m_StreamPosition + byteCountToWrite > writer.m_RawFileDataSz) {
|
||||||
uint8 *newData = new uint8[writer.m_RawFileDataSz << 1];
|
uint8 *newData = new uint8[writer.m_RawFileDataSz << 1];
|
||||||
memcpy(newData, writer.m_RawFileData, writer.m_RawFileDataSz);
|
memcpy(newData, writer.m_RawFileData, writer.m_RawFileDataSz);
|
||||||
writer.m_RawFileDataSz <<= 1;
|
writer.m_RawFileDataSz <<= 1;
|
||||||
delete writer.m_RawFileData;
|
delete writer.m_RawFileData;
|
||||||
writer.m_RawFileData = newData;
|
writer.m_RawFileData = newData;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned char *stream = &(writer.m_RawFileData[writer.m_StreamPosition]);
|
unsigned char *stream = &(writer.m_RawFileData[writer.m_StreamPosition]);
|
||||||
memcpy(stream, outBytes, byteCountToWrite);
|
memcpy(stream, outBytes, byteCountToWrite);
|
||||||
|
|
||||||
writer.m_StreamPosition += byteCountToWrite;
|
writer.m_StreamPosition += byteCountToWrite;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void FlushStream(png_structp png_ptr) { /* Do nothing... */ }
|
static void FlushStream(png_structp png_ptr) { /* Do nothing... */ }
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
ImageWriterPNG::ImageWriterPNG(const Image &im)
|
ImageWriterPNG::ImageWriterPNG(const Image &im)
|
||||||
: ImageWriter(im.GetWidth(), im.GetHeight(), im.RawData())
|
: ImageWriter(im.GetWidth(), im.GetHeight(), im.RawData())
|
||||||
, m_StreamPosition(0)
|
, m_StreamPosition(0)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ImageWriterPNG::WriteImage() {
|
bool ImageWriterPNG::WriteImage() {
|
||||||
|
|
||||||
png_structp png_ptr = NULL;
|
png_structp png_ptr = NULL;
|
||||||
png_infop info_ptr = NULL;
|
png_infop info_ptr = NULL;
|
||||||
png_byte ** row_pointers = NULL;
|
png_byte ** row_pointers = NULL;
|
||||||
int pixel_size = 4;
|
int pixel_size = 4;
|
||||||
int depth = 8;
|
int depth = 8;
|
||||||
|
|
||||||
png_ptr = png_create_write_struct (PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
|
png_ptr = png_create_write_struct (PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
|
||||||
if (png_ptr == NULL) {
|
if (png_ptr == NULL) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
info_ptr = png_create_info_struct (png_ptr);
|
info_ptr = png_create_info_struct (png_ptr);
|
||||||
if (info_ptr == NULL) {
|
if (info_ptr == NULL) {
|
||||||
png_destroy_write_struct (&png_ptr, &info_ptr);
|
png_destroy_write_struct (&png_ptr, &info_ptr);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Set image attributes. */
|
/* Set image attributes. */
|
||||||
|
|
||||||
png_set_IHDR (png_ptr,
|
png_set_IHDR (png_ptr,
|
||||||
info_ptr,
|
info_ptr,
|
||||||
m_Width,
|
m_Width,
|
||||||
m_Height,
|
m_Height,
|
||||||
depth,
|
depth,
|
||||||
PNG_COLOR_TYPE_RGBA,
|
PNG_COLOR_TYPE_RGBA,
|
||||||
PNG_INTERLACE_NONE,
|
PNG_INTERLACE_NONE,
|
||||||
PNG_COMPRESSION_TYPE_DEFAULT,
|
PNG_COMPRESSION_TYPE_DEFAULT,
|
||||||
PNG_FILTER_TYPE_DEFAULT);
|
PNG_FILTER_TYPE_DEFAULT);
|
||||||
|
|
||||||
/* Initialize rows of PNG. */
|
/* Initialize rows of PNG. */
|
||||||
|
|
||||||
row_pointers = (png_byte **)png_malloc (png_ptr, m_Height * sizeof (png_byte *));
|
row_pointers = (png_byte **)png_malloc (png_ptr, m_Height * sizeof (png_byte *));
|
||||||
for (uint32 y = 0; y < m_Height; ++y) {
|
for (uint32 y = 0; y < m_Height; ++y) {
|
||||||
png_byte *row = (png_byte *)png_malloc (png_ptr, sizeof (uint8) * m_Width * pixel_size);
|
png_byte *row = (png_byte *)png_malloc (png_ptr, sizeof (uint8) * m_Width * pixel_size);
|
||||||
|
|
||||||
row_pointers[y] = row;
|
row_pointers[y] = row;
|
||||||
|
|
||||||
for (uint32 x = 0; x < m_Width; ++x) {
|
for (uint32 x = 0; x < m_Width; ++x) {
|
||||||
for(uint32 ch = 0; ch < 4; ch++) {
|
for(uint32 ch = 0; ch < 4; ch++) {
|
||||||
*row++ = GetChannelForPixel(x, y, ch);
|
*row++ = GetChannelForPixel(x, y, ch);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
png_set_write_fn(png_ptr, this, PNGStreamWriter::WriteDataToStream, PNGStreamWriter::FlushStream);
|
png_set_write_fn(png_ptr, this, PNGStreamWriter::WriteDataToStream, PNGStreamWriter::FlushStream);
|
||||||
png_set_rows (png_ptr, info_ptr, row_pointers);
|
png_set_rows (png_ptr, info_ptr, row_pointers);
|
||||||
png_write_png (png_ptr, info_ptr, PNG_TRANSFORM_IDENTITY, NULL);
|
png_write_png (png_ptr, info_ptr, PNG_TRANSFORM_IDENTITY, NULL);
|
||||||
|
|
||||||
for (uint32 y = 0; y < m_Height; y++) {
|
for (uint32 y = 0; y < m_Height; y++) {
|
||||||
png_free (png_ptr, row_pointers[y]);
|
png_free (png_ptr, row_pointers[y]);
|
||||||
}
|
}
|
||||||
png_free (png_ptr, row_pointers);
|
png_free (png_ptr, row_pointers);
|
||||||
|
|
||||||
png_destroy_write_struct (&png_ptr, &info_ptr);
|
png_destroy_write_struct (&png_ptr, &info_ptr);
|
||||||
|
|
||||||
m_RawFileDataSz = m_StreamPosition;
|
m_RawFileDataSz = m_StreamPosition;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -55,8 +55,8 @@ class ImageWriterPNG : public ImageWriter {
|
||||||
|
|
||||||
virtual bool WriteImage();
|
virtual bool WriteImage();
|
||||||
private:
|
private:
|
||||||
uint32 m_StreamPosition;
|
uint32 m_StreamPosition;
|
||||||
friend class PNGStreamWriter;
|
friend class PNGStreamWriter;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // _IMAGE_LOADER_H_
|
#endif // _IMAGE_LOADER_H_
|
||||||
|
|
Loading…
Reference in a new issue