diff --git a/ASTCEncoder/src/Decompressor.cpp b/ASTCEncoder/src/Decompressor.cpp index a3fb636..40ffff8 100644 --- a/ASTCEncoder/src/Decompressor.cpp +++ b/ASTCEncoder/src/Decompressor.cpp @@ -66,6 +66,8 @@ #include "BitStream.h" using FasTC::BitStreamReadOnly; +#include "Pixel.h" + namespace ASTCC { struct TexelWeightParams { @@ -151,7 +153,7 @@ namespace ASTCC { // layout is in [7-9] if(modeBits & 0x80) { // layout is in [7-8] - assert(modeBits & 0x40 == 0); + assert((modeBits & 0x40) == 0U); if(modeBits & 0x20) { layout = 8; } else { @@ -554,6 +556,8 @@ namespace ASTCC { result = (A & 0x20) | (result >> 2); } + assert(result < 64); + // Change from [0,63] to [0,64] if(result > 32) { result += 1; @@ -571,18 +575,187 @@ namespace ASTCC { std::vector::const_iterator itr; for(itr = weights.begin(); itr != weights.end(); itr++) { unquantized[0][weightIdx] = UnquantizeTexelWeight(*itr); - assert(unquantized[0][weightIdx] <= 64); if(params.m_bDualPlane) { itr++; unquantized[1][weightIdx] = UnquantizeTexelWeight(*itr); - assert(unquantized[1][weightIdx] <= 64); } weightIdx++; } - // Do infill if necessary... + // Do infill if necessary (Section C.2.18) ... + uint32 Ds = (1024 + (blockWidth/2)) / (blockWidth - 1); + uint32 Dt = (1024 + (blockHeight/2)) / (blockHeight - 1); + + for(uint32 plane = 0; plane < (params.m_bDualPlane? 2 : 1); plane++) + for(uint32 t = 0; t < blockHeight; t++) + for(uint32 s = 0; s < blockWidth; s++) { + uint32 cs = Ds * s; + uint32 ct = Dt * t; + + uint32 gs = (cs * (params.m_Width - 1) + 32) >> 6; + uint32 gt = (ct * (params.m_Height - 1) + 32) >> 6; + + uint32 js = gs >> 4; + uint32 fs = gs & 0xF; + + uint32 jt = gt >> 4; + uint32 ft = gt & 0x0F; + + uint32 v0 = js + jt * params.m_Width; + + assert(v0 < (params.m_Width * params.m_Height)); + uint32 p00 = unquantized[plane][v0]; + + assert((v0 + 1) < (params.m_Width * params.m_Height)); + uint32 p01 = unquantized[plane][v0 + 1]; + + assert((v0 + params.m_Width) < (params.m_Width * params.m_Height)); + uint32 p10 = unquantized[plane][v0 + params.m_Width]; + + assert((v0 + params.m_Width + 1) < (params.m_Width * params.m_Height)); + uint32 p11 = unquantized[plane][v0 + params.m_Width + 1]; + + uint32 w11 = (fs * ft + 8) >> 4; + uint32 w10 = ft - w11; + uint32 w01 = fs - w11; + uint32 w00 = 16 - fs - ft + w11; + + out[plane][t*blockWidth + s] = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4; + } + } + + // Section C.2.14 + void ComputeEndpoints(FasTC::Pixel &ep1, FasTC::Pixel &ep2, + const uint32* &colorValues, uint32 colorEndpointMode) { + #define READ_UINT_VALUES(N) \ + uint32 v[N]; \ + for(uint32 i = 0; i < N; i++) { \ + v[i] = *(colorValues++); \ + } + + #define READ_INT_VALUES(N) \ + int32 v[N]; \ + for(uint32 i = 0; i < N; i++) { \ + v[i] = static_cast(*(colorValues++)); \ + } + + switch(colorEndpointMode) { + case 0: { + READ_UINT_VALUES(2) + ep1 = FasTC::Pixel(0xFF, v[0], v[0], v[0]); + ep2 = FasTC::Pixel(0xFF, v[1], v[1], v[1]); + } + break; + + case 1: { + READ_UINT_VALUES(2) + uint32 L0 = (v[0] >> 2) | (v[1] & 0xC0); + uint32 L1 = std::max(L0 + (v[1] & 0x3F), 0xFFU); + ep1 = FasTC::Pixel(0xFF, L0, L0, L0); + ep2 = FasTC::Pixel(0xFF, L1, L1, L1); + } + break; + + case 4: { + READ_UINT_VALUES(4) + ep1 = FasTC::Pixel(v[2], v[0], v[0], v[0]); + ep2 = FasTC::Pixel(v[3], v[1], v[1], v[1]); + } + break; + + case 5: { + READ_INT_VALUES(4) + BitTransferSigned(v[1], v[0]); + BitTransferSigned(v[3], v[2]); + ep1 = FasTC::Pixel(v[2], v[0], v[0], v[0]); + ep2 = FasTC::Pixel(v[2]+v[3], v[0]+v[1], v[0]+v[1], v[0]+v[1]); + ep1.ClampByte(); + ep2.ClampByte(); + } + break; + + case 6: { + READ_UINT_VALUES(4) + ep1 = FasTC::Pixel(0xFF, v[0]*v[3] >> 8, v[1]*v[3] >> 8, v[2]*v[3] >> 8); + ep2 = FasTC::Pixel(0xFF, v[0], v[1], v[2]); + } + break; + + case 8: { + READ_UINT_VALUES(6) + if(v[1]+v[3]+v[5] >= v[0]+v[2]+v[4]) { + ep1 = FasTC::Pixel(0xFF, v[0], v[2], v[4]); + ep2 = FasTC::Pixel(0xFF, v[1], v[3], v[5]); + } else { + ep1 = BlueContract(0xFF, v[1], v[3], v[5]); + ep2 = BlueContract(0xFF, v[0], v[2], v[4]); + } + } + break; + + case 9: { + READ_INT_VALUES(6) + BitTransferSigned(v[1], v[0]); + BitTransferSigned(v[3], v[2]); + BitTransferSigned(v[5], v[4]); + if(v[1]+v[3]+v[5] >= 0) { + ep1 = FasTC::Pixel(0xFF, v[0], v[2], v[4]); + ep2 = FasTC::Pixel(0xFF, v[0]+v[1], v[2]+v[3], v[4]+v[5]); + } else { + ep1 = BlueContract(0xFF, v[0]+v[1], v[2]+v[3], v[4]+v[5]); + ep2 = BlueContract(0xFF, v[0], v[2], v[4]); + } + ep1.ClampByte(); + ep2.ClampByte(); + } + break; + + case 10: { + READ_UINT_VALUES(6) + ep1 = FasTC::Pixel(v[4], v[0]*v[3] >> 8, v[1]*v[3] >> 8, v[2]*v[3] >> 8); + ep2 = FasTC::Pixel(v[5], v[0], v[1], v[2]); + } + break; + + case 12: { + READ_UINT_VALUES(8) + if(v[1]+v[3]+v[5] >= v[0]+v[2]+v[4]) { + ep1 = FasTC::Pixel(v[6], v[0], v[2], v[4]); + ep2 = FasTC::Pixel(v[7], v[1], v[3], v[5]); + } else { + ep1 = BlueContract(v[7], v[1], v[3], v[5]); + ep2 = BlueContract(v[6], v[0], v[2], v[4]); + } + } + break; + + case 13: { + READ_INT_VALUES(8) + BitTransferSigned(v[1], v[0]); + BitTransferSigned(v[3], v[2]); + BitTransferSigned(v[5], v[4]); + BitTransferSigned(v[7], v[6]); + if(v[1]+v[3]+v[5] >= 0) { + ep1 = FasTC::Pixel(v[6], v[0], v[2], v[4]); + ep2 = FasTC::Pixel(v[7]+v[6], v[0]+v[1], v[2]+v[3], v[4]+v[5]); + } else { + ep1 = BlueContract(v[6]+v[7], v[0]+v[1], v[2]+v[3], v[4]+v[5]); + ep2 = BlueContract(v[6], v[0], v[2], v[4]); + } + ep1.ClampByte(); + ep2.ClampByte(); + } + break; + + default: + assert(!"Unsupported color endpoint mode (is it HDR?)"); + break; + } + + #undef READ_UINT_VALUES + #undef READ_INT_VALUES } void DecompressBlock(const uint8 inBuf[16], @@ -743,9 +916,48 @@ namespace ASTCC { weightParams.m_MaxWeight, weightParams.m_Width * weightParams.m_Height); + FasTC::Pixel endpoints[4][2]; + const uint32 *colorValuesPtr = colorValues; + for(uint32 i = 0; i < nPartitions; i++) { + ComputeEndpoints(endpoints[i][0], endpoints[i][1], + colorValuesPtr, colorEndpointMode[i]); + } + // Blocks can be at most 12x12, so we can have as many as 144 weights uint32 weights[2][144]; UnquantizeTexelWeights(weights, texelWeightValues, weightParams, blockWidth, blockHeight); + + // Now that we have endpoints and weights, we can interpolate and generate + // the proper decoding... + for(uint32 j = 0; j < blockHeight; j++) + for(uint32 i = 0; i < blockWidth; i++) { + uint32 partition = Select2DPartition( + partitionIndex, i, j, nPartitions, (blockHeight * blockWidth) < 32 + ); + if(nPartitions == 1) { + partition = 0; + } + assert(partition < nPartitions); + + FasTC::Pixel p; + for(uint32 c = 0; c < 4; c++) { + uint32 C0 = endpoints[partition][0].Component(c); + C0 = FasTC::Replicate(C0, 8, 16); + uint32 C1 = endpoints[partition][1].Component(c); + C1 = FasTC::Replicate(C1, 8, 16); + + uint32 plane = 0; + if(weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) { + plane = 1; + } + + uint32 weight = weights[plane][j * blockWidth + i]; + uint32 C = (C0 * (64 - weight) + C1 * weight + 32) / 64; + p.Component(c) = C >> 8; + } + + outBuf[j * blockWidth + i] = p.Pack(); + } } void Decompress(const FasTC::DecompressionJob &dcj, EASTCBlockSize blockSize) { @@ -757,6 +969,7 @@ namespace ASTCC { const uint8 *blockPtr = dcj.InBuf() + blockIdx*16; + // Blocks can be at most 12x12 uint32 uncompData[144]; uint8 *dataPtr = reinterpret_cast(uncompData); DecompressBlock(blockPtr, blockWidth, blockHeight, dataPtr); diff --git a/ASTCEncoder/src/Utils.h b/ASTCEncoder/src/Utils.h index 22c7572..8abd507 100644 --- a/ASTCEncoder/src/Utils.h +++ b/ASTCEncoder/src/Utils.h @@ -56,6 +56,7 @@ #include "ASTCCompressor.h" #include "TexCompTypes.h" +#include "Pixel.h" namespace ASTCC { @@ -109,6 +110,107 @@ namespace ASTCC { } return c; } + + // Transfers a bit as described in C.2.14 + void BitTransferSigned(int32 &a, int32 &b) { + b >>= 1; + b |= a & 0x80; + a >>= 1; + a &= 0x3F; + if(a & 0x20) + a -= 0x40; + } + + // Adds more precision to the blue channel as described + // in C.2.14 + FasTC::Pixel BlueContract(int32 a, int32 r, int32 g, int32 b) { + return FasTC::Pixel(a, (r + b) >> 1, (g + b) >> 1, b); + } + + // Partition selection functions as specified in + // C.2.21 + uint32 hash52(uint32 p) { + p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4; + p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3; + p ^= p << 6; p ^= p >> 17; + return p; + } + + int32 SelectPartition(int32 seed, int32 x, int32 y, int32 z, + int32 partitionCount, int32 smallBlock) { + if(smallBlock) { + x <<= 1; + y <<= 1; + z <<= 1; + } + + seed += (partitionCount-1) * 1024; + + uint32 rnum = hash52(seed); + uint8 seed1 = rnum & 0xF; + uint8 seed2 = (rnum >> 4) & 0xF; + uint8 seed3 = (rnum >> 8) & 0xF; + uint8 seed4 = (rnum >> 12) & 0xF; + uint8 seed5 = (rnum >> 16) & 0xF; + uint8 seed6 = (rnum >> 20) & 0xF; + uint8 seed7 = (rnum >> 24) & 0xF; + uint8 seed8 = (rnum >> 28) & 0xF; + uint8 seed9 = (rnum >> 18) & 0xF; + uint8 seed10 = (rnum >> 22) & 0xF; + uint8 seed11 = (rnum >> 26) & 0xF; + uint8 seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF; + + seed1 *= seed1; seed2 *= seed2; + seed3 *= seed3; seed4 *= seed4; + seed5 *= seed5; seed6 *= seed6; + seed7 *= seed7; seed8 *= seed8; + seed9 *= seed9; seed10 *= seed10; + seed11 *= seed11; seed12 *= seed12; + + int32 sh1, sh2, sh3; + if(seed & 1) { + sh1 = (seed & 2)? 4 : 5; + sh2 = (partitionCount == 3)? 6 : 5; + } else { + sh1 = (partitionCount == 3)? 6 : 5; + sh2 = (seed & 2)? 4 : 5; + } + sh3 = (seed & 0x10) ? sh1 : sh2; + + seed1 >>= sh1; seed2 >>= sh2; seed3 >>= sh1; seed4 >>= sh2; + seed5 >>= sh1; seed6 >>= sh2; seed7 >>= sh1; seed8 >>= sh2; + seed9 >>= sh3; seed10 >>= sh3; seed11 >>= sh3; seed12 >>= sh3; + + int32 a = seed1*x + seed2*y + seed11*z + (rnum >> 14); + int32 b = seed3*x + seed4*y + seed12*z + (rnum >> 10); + int32 c = seed5*x + seed6*y + seed9 *z + (rnum >> 6); + int32 d = seed7*x + seed8*y + seed10*z + (rnum >> 2); + + a &= 0x3F; b &= 0x3F; c &= 0x3F; d &= 0x3F; + + if( partitionCount < 4 ) d = 0; + if( partitionCount < 3 ) c = 0; + + if( a >= b && a >= c && a >= d ) return 0; + else if( b >= c && b >= d ) return 1; + else if( c >= d ) return 2; + return 3; + } + + int32 Select2DPartition(int32 seed, int32 x, int32 y, + int32 partitionCount, int32 smallBlock) { + return SelectPartition(seed, x, y, 0, partitionCount, smallBlock); + } + + int32 SelectSmall2DPartition(int32 seed, int32 x, int32 y, + int32 partitionCount) { + return Select2DPartition(seed, x, y, partitionCount, 1); + } + + int32 SelectLarge2DPartition(int32 seed, int32 x, int32 y, + int32 partitionCount) { + return Select2DPartition(seed, x, y, partitionCount, 0); + } } // namespace ASTCC #endif // ASTCENCODER_SRC_UTILS_H_