mirror of
https://github.com/yuzu-emu/FasTC.git
synced 2025-01-23 05:31:02 +00:00
Refactor CompressionJob struct.
In order to better facilitate the change from block stream order to non-block stream order, a lot of changes were introduced to the way that we feed texture data to the compressors. This data is embodied in the CompressionJob struct. We have made it so that the compression job points to both the in and out pointers for our compressed and uncompressed data. Furthermore, we have made sure that the struct also contains the format that its compressing for, so that if any threading programs would like to chop up a compression job into smaller chunks based on the format, it doesn't need to know the format explicitly, it just needs to know certain properties about the format. Moreover, the user can now define the start and end pixels from which we would like to compress to. We can compress subsets of data by changing the in and out pointers and the width and height values. The compressors will read data linearly until they reach the out pixels based on the width of the given pixel.
This commit is contained in:
parent
f70b26a47f
commit
a80944901e
|
@ -114,13 +114,13 @@ namespace BC7C {
|
||||||
|
|
||||||
// Compress the image given as RGBA data to BC7 format. Width and Height are
|
// Compress the image given as RGBA data to BC7 format. Width and Height are
|
||||||
// the dimensions of the image in pixels.
|
// the dimensions of the image in pixels.
|
||||||
void Compress(const CompressionJob &);
|
void Compress(const FasTC::CompressionJob &);
|
||||||
|
|
||||||
// Perform a compression while recording all of the choices the compressor
|
// Perform a compression while recording all of the choices the compressor
|
||||||
// made into a list of statistics. We can use this to see whether or not
|
// made into a list of statistics. We can use this to see whether or not
|
||||||
// certain heuristics are working, such as whether or not certain modes are
|
// certain heuristics are working, such as whether or not certain modes are
|
||||||
// being chosen more often than others, etc.
|
// being chosen more often than others, etc.
|
||||||
void CompressWithStats(const CompressionJob &, std::ostream *logStream);
|
void CompressWithStats(const FasTC::CompressionJob &, std::ostream *logStream);
|
||||||
|
|
||||||
#ifdef HAS_SSE_41
|
#ifdef HAS_SSE_41
|
||||||
// Compress the image given as RGBA data to BC7 format using an algorithm
|
// Compress the image given as RGBA data to BC7 format using an algorithm
|
||||||
|
@ -135,12 +135,12 @@ namespace BC7C {
|
||||||
// to compress a list of textures. If this function is called with the same
|
// to compress a list of textures. If this function is called with the same
|
||||||
// argument from multiple threads, they will work together to compress all of
|
// argument from multiple threads, they will work together to compress all of
|
||||||
// the images in the list.
|
// the images in the list.
|
||||||
void CompressAtomic(CompressionJobList &);
|
void CompressAtomic(FasTC::CompressionJobList &);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Decompress the image given as BC7 data to R8G8B8A8 format. Width and Height
|
// Decompress the image given as BC7 data to R8G8B8A8 format. Width and Height
|
||||||
// are the dimensions of the image in pixels.
|
// are the dimensions of the image in pixels.
|
||||||
void Decompress(const DecompressionJob &);
|
void Decompress(const FasTC::DecompressionJob &);
|
||||||
} // namespace BC7C
|
} // namespace BC7C
|
||||||
|
|
||||||
#endif // BPTCENCODER_INCLUDE_BC7COMPRESSOR_H_
|
#endif // BPTCENCODER_INCLUDE_BC7COMPRESSOR_H_
|
||||||
|
|
|
@ -1622,23 +1622,32 @@ namespace BC7C {
|
||||||
|
|
||||||
static void DecompressBC7Block(const uint8 block[16], uint32 outBuf[16]);
|
static void DecompressBC7Block(const uint8 block[16], uint32 outBuf[16]);
|
||||||
|
|
||||||
|
void GetBlock(const uint32 x, const uint32 y, const uint32 pixelsWide,
|
||||||
|
const uint32 *inPixels, uint32 block[16]) {
|
||||||
|
memcpy(block, inPixels + y*pixelsWide + x, 4 * sizeof(uint32));
|
||||||
|
memcpy(block + 4, inPixels + (y+1)*pixelsWide + x, 4 * sizeof(uint32));
|
||||||
|
memcpy(block + 8, inPixels + (y+2)*pixelsWide + x, 4 * sizeof(uint32));
|
||||||
|
memcpy(block + 12, inPixels + (y+3)*pixelsWide + x, 4 * sizeof(uint32));
|
||||||
|
}
|
||||||
|
|
||||||
// Compress an image using BC7 compression. Use the inBuf parameter to point
|
// Compress an image using BC7 compression. Use the inBuf parameter to point
|
||||||
// to an image in 4-byte RGBA format. The width and height parameters specify
|
// to an image in 4-byte RGBA format. The width and height parameters specify
|
||||||
// the size of the image in pixels. The buffer pointed to by outBuf should be
|
// the size of the image in pixels. The buffer pointed to by outBuf should be
|
||||||
// large enough to store the compressed image. This implementation has an 4:1
|
// large enough to store the compressed image. This implementation has an 4:1
|
||||||
// compression ratio.
|
// compression ratio.
|
||||||
void Compress(const CompressionJob &cj) {
|
void Compress(const FasTC::CompressionJob &cj) {
|
||||||
const uint32 *inPixels = reinterpret_cast<const uint32 *>(cj.InBuf());
|
const uint32 *inPixels = reinterpret_cast<const uint32 *>(cj.InBuf());
|
||||||
unsigned char *outBuf = cj.OutBuf();
|
const uint32 kBlockSz = GetBlockSize(FasTC::eCompressionFormat_BPTC);
|
||||||
for(uint32 j = 0; j < cj.Height(); j += 4) {
|
uint8 *outBuf = cj.OutBuf() + cj.CoordsToBlockIdx(cj.XStart(), cj.YStart()) * kBlockSz;
|
||||||
for(uint32 i = 0; i < cj.Width(); i += 4) {
|
|
||||||
|
uint32 startX = cj.XStart();
|
||||||
|
bool done = false;
|
||||||
|
|
||||||
|
for(uint32 j = cj.YStart(); !done; j += 4) {
|
||||||
|
for(uint32 i = startX; !done && i < cj.Width(); i += 4) {
|
||||||
|
|
||||||
uint32 block[16];
|
uint32 block[16];
|
||||||
memcpy(block, inPixels + j*cj.RowBytes() + i, 4 * sizeof(uint32));
|
GetBlock(i, j, cj.Width(), inPixels, block);
|
||||||
memcpy(block + 4, inPixels + (j+1)*cj.RowBytes() + i, 4 * sizeof(uint32));
|
|
||||||
memcpy(block + 8, inPixels + (j+2)*cj.RowBytes() + i, 4 * sizeof(uint32));
|
|
||||||
memcpy(block + 12, inPixels + (j+3)*cj.RowBytes() + i, 4 * sizeof(uint32));
|
|
||||||
|
|
||||||
CompressBC7Block(block, outBuf);
|
CompressBC7Block(block, outBuf);
|
||||||
|
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
|
@ -1666,8 +1675,10 @@ namespace BC7C {
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
outBuf += 16;
|
outBuf += kBlockSz;
|
||||||
|
done = i+4 >= cj.XEnd() && j+(i+4 == cj.Width()? 4 : 0) >= cj.YEnd();
|
||||||
}
|
}
|
||||||
|
startX = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1691,24 +1702,28 @@ namespace BC7C {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Variables used for synchronization in threadsafe implementation.
|
// Variables used for synchronization in threadsafe implementation.
|
||||||
void CompressAtomic(CompressionJobList &cjl) {
|
void CompressAtomic(FasTC::CompressionJobList &cjl) {
|
||||||
uint32 jobIdx;
|
uint32 jobIdx;
|
||||||
while((jobIdx = cjl.m_CurrentJobIndex) < cjl.GetNumJobs()) {
|
while((jobIdx = cjl.m_CurrentJobIndex) < cjl.GetNumJobs()) {
|
||||||
// !HACK! ... Microsoft has this defined
|
// !HACK! ... Microsoft has this defined
|
||||||
#undef GetJob
|
#undef GetJob
|
||||||
|
|
||||||
const CompressionJob *cj = cjl.GetJob(jobIdx);
|
const FasTC::CompressionJob *cj = cjl.GetJob(jobIdx);
|
||||||
const uint32 nBlocks = (cj->height * cj->width) / 16;
|
const uint32 nBlocks = (cj->Height() * cj->Width()) / 16;
|
||||||
|
|
||||||
// Help finish whatever texture we're compressing before we start again on
|
// Help finish whatever texture we're compressing before we start again on
|
||||||
// my work...
|
// my work...
|
||||||
uint32 blockIdx;
|
uint32 blockIdx;
|
||||||
while((blockIdx = FetchAndAdd(&(cjl.m_CurrentBlockIndex))) < nBlocks &&
|
while((blockIdx = FetchAndAdd(&(cjl.m_CurrentBlockIndex))) < nBlocks &&
|
||||||
*(cjl.GetFinishedFlag(jobIdx)) == 0) {
|
*(cjl.GetFinishedFlag(jobIdx)) == 0) {
|
||||||
unsigned char *out = cj->outBuf + (16 * blockIdx);
|
unsigned char *out = cj->OutBuf() + (16 * blockIdx);
|
||||||
const unsigned char *in = cj->inBuf + (64 * blockIdx);
|
|
||||||
|
|
||||||
CompressBC7Block((const uint32 *)in, out);
|
uint32 block[16];
|
||||||
|
uint32 x = cj->XStart() + 4 * (blockIdx % (cj->Width() / 4));
|
||||||
|
uint32 y = cj->YStart() + 4 * (blockIdx / (cj->Width() / 4));
|
||||||
|
const uint32 *inPixels = reinterpret_cast<const uint32 *>(cj->InBuf());
|
||||||
|
GetBlock(x, y, cj->Width(), inPixels, block);
|
||||||
|
CompressBC7Block(block, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(TestAndSet(cjl.GetFinishedFlag(jobIdx)) == 0) {
|
if(TestAndSet(cjl.GetFinishedFlag(jobIdx)) == 0) {
|
||||||
|
@ -1722,21 +1737,21 @@ namespace BC7C {
|
||||||
}
|
}
|
||||||
#endif // HAS_ATOMICS
|
#endif // HAS_ATOMICS
|
||||||
|
|
||||||
void CompressWithStats(const CompressionJob &cj, std::ostream *logStream) {
|
void CompressWithStats(const FasTC::CompressionJob &cj, std::ostream *logStream) {
|
||||||
const uint32 *inPixels = reinterpret_cast<const uint32 *>(cj.InBuf());
|
const uint32 *inPixels = reinterpret_cast<const uint32 *>(cj.InBuf());
|
||||||
unsigned char *outBuf = cj.OutBuf();
|
const uint32 kBlockSz = GetBlockSize(FasTC::eCompressionFormat_BPTC);
|
||||||
|
uint8 *outBuf = cj.OutBuf() + cj.CoordsToBlockIdx(cj.XStart(), cj.YStart()) * kBlockSz;
|
||||||
|
|
||||||
for(uint32 j = 0; j < cj.Height(); j += 4) {
|
uint32 startX = cj.XStart();
|
||||||
for(uint32 i = 0; i < cj.Width(); i += 4) {
|
bool done = false;
|
||||||
|
for(uint32 j = cj.YStart(); !done; j += 4) {
|
||||||
|
for(uint32 i = startX; !done && i < cj.Width(); i += 4) {
|
||||||
|
|
||||||
uint32 block[16];
|
uint32 block[16];
|
||||||
memcpy(block, inPixels + j*cj.RowBytes() + i, 4 * sizeof(uint32));
|
GetBlock(i, j, cj.Width(), inPixels, block);
|
||||||
memcpy(block + 4, inPixels + (j+1)*cj.RowBytes() + i, 4 * sizeof(uint32));
|
|
||||||
memcpy(block + 8, inPixels + (j+2)*cj.RowBytes() + i, 4 * sizeof(uint32));
|
|
||||||
memcpy(block + 12, inPixels + (j+3)*cj.RowBytes() + i, 4 * sizeof(uint32));
|
|
||||||
|
|
||||||
if(logStream) {
|
if(logStream) {
|
||||||
uint64 blockIdx = reinterpret_cast<uint64>(inPixels + j*cj.Width() + i);
|
uint64 blockIdx = cj.CoordsToBlockIdx(i, j);
|
||||||
CompressBC7Block(block, outBuf, BlockLogger(blockIdx, *logStream));
|
CompressBC7Block(block, outBuf, BlockLogger(blockIdx, *logStream));
|
||||||
} else {
|
} else {
|
||||||
CompressBC7Block(block, outBuf);
|
CompressBC7Block(block, outBuf);
|
||||||
|
@ -1749,9 +1764,9 @@ namespace BC7C {
|
||||||
DecompressBC7Block(cmpData, unComp);
|
DecompressBC7Block(cmpData, unComp);
|
||||||
const uint8* unCompData = reinterpret_cast<uint8 *>(unComp);
|
const uint8* unCompData = reinterpret_cast<uint8 *>(unComp);
|
||||||
|
|
||||||
int diffSum = 0;
|
uint32 diffSum = 0;
|
||||||
for(int i = 0; i < 64; i++) {
|
for(uint32 k = 0; k < 64; k++) {
|
||||||
diffSum += sad(unCompData[i], inBlock[i]);
|
diffSum += sad(unCompData[k], inBlock[k]);
|
||||||
}
|
}
|
||||||
double blockError = static_cast<double>(diffSum) / 64.0;
|
double blockError = static_cast<double>(diffSum) / 64.0;
|
||||||
if(blockError > 50.0) {
|
if(blockError > 50.0) {
|
||||||
|
@ -1761,7 +1776,10 @@ namespace BC7C {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
outBuf += 16;
|
outBuf += 16;
|
||||||
|
done = i+4 >= cj.XEnd() && j+(i+4 == cj.Width()? 4 : 0) >= cj.YEnd();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
startX = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2755,7 +2773,7 @@ namespace BC7C {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert the image from a BC7 buffer to a RGBA8 buffer
|
// Convert the image from a BC7 buffer to a RGBA8 buffer
|
||||||
void Decompress(const DecompressionJob &dj) {
|
void Decompress(const FasTC::DecompressionJob &dj) {
|
||||||
|
|
||||||
const uint8 *inBuf = dj.InBuf();
|
const uint8 *inBuf = dj.InBuf();
|
||||||
uint32 *outBuf = reinterpret_cast<uint32 *>(dj.OutBuf());
|
uint32 *outBuf = reinterpret_cast<uint32 *>(dj.OutBuf());
|
||||||
|
|
103
Base/include/CompressionFormat.h
Normal file
103
Base/include/CompressionFormat.h
Normal file
|
@ -0,0 +1,103 @@
|
||||||
|
/* FasTC
|
||||||
|
* Copyright (c) 2013 University of North Carolina at Chapel Hill.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission to use, copy, modify, and distribute this software and its
|
||||||
|
* documentation for educational, research, and non-profit purposes, without
|
||||||
|
* fee, and without a written agreement is hereby granted, provided that the
|
||||||
|
* above copyright notice, this paragraph, and the following four paragraphs
|
||||||
|
* appear in all copies.
|
||||||
|
*
|
||||||
|
* Permission to incorporate this software into commercial products may be
|
||||||
|
* obtained by contacting the authors or the Office of Technology Development
|
||||||
|
* at the University of North Carolina at Chapel Hill <otd@unc.edu>.
|
||||||
|
*
|
||||||
|
* This software program and documentation are copyrighted by the University of
|
||||||
|
* North Carolina at Chapel Hill. The software program and documentation are
|
||||||
|
* supplied "as is," without any accompanying services from the University of
|
||||||
|
* North Carolina at Chapel Hill or the authors. The University of North
|
||||||
|
* Carolina at Chapel Hill and the authors do not warrant that the operation of
|
||||||
|
* the program will be uninterrupted or error-free. The end-user understands
|
||||||
|
* that the program was developed for research purposes and is advised not to
|
||||||
|
* rely exclusively on the program for any reason.
|
||||||
|
*
|
||||||
|
* IN NO EVENT SHALL THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE
|
||||||
|
* AUTHORS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL,
|
||||||
|
* OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
|
||||||
|
* THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF NORTH CAROLINA
|
||||||
|
* AT CHAPEL HILL OR THE AUTHORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
||||||
|
* DAMAGE.
|
||||||
|
*
|
||||||
|
* THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS SPECIFICALLY
|
||||||
|
* DISCLAIM ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY
|
||||||
|
* STATUTORY WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON
|
||||||
|
* AN "AS IS" BASIS, AND THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND
|
||||||
|
* THE AUTHORS HAVE NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
|
||||||
|
* ENHANCEMENTS, OR MODIFICATIONS.
|
||||||
|
*
|
||||||
|
* Please send all BUG REPORTS to <pavel@cs.unc.edu>.
|
||||||
|
*
|
||||||
|
* The authors may be contacted via:
|
||||||
|
*
|
||||||
|
* Pavel Krajcevski
|
||||||
|
* Dept of Computer Science
|
||||||
|
* 201 S Columbia St
|
||||||
|
* Frederick P. Brooks, Jr. Computer Science Bldg
|
||||||
|
* Chapel Hill, NC 27599-3175
|
||||||
|
* USA
|
||||||
|
*
|
||||||
|
* <http://gamma.cs.unc.edu/FasTC/>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _BASE_INCLUDE_COMPRESSIONFORMAT_H_
|
||||||
|
#define _BASE_INCLUDE_COMPRESSIONFORMAT_H_
|
||||||
|
|
||||||
|
#include "TexCompTypes.h"
|
||||||
|
|
||||||
|
namespace FasTC {
|
||||||
|
|
||||||
|
// The different supported compression formats
|
||||||
|
enum ECompressionFormat {
|
||||||
|
eCompressionFormat_DXT1,
|
||||||
|
eCompressionFormat_DXT5,
|
||||||
|
eCompressionFormat_ETC1,
|
||||||
|
eCompressionFormat_BPTC,
|
||||||
|
eCompressionFormat_PVRTC,
|
||||||
|
|
||||||
|
kNumCompressionFormats
|
||||||
|
};
|
||||||
|
|
||||||
|
// Returns the dimensions of the blocks for the given format.
|
||||||
|
inline static void GetBlockDimensions(ECompressionFormat fmt, uint32 (&outSz)[2]) {
|
||||||
|
switch(fmt) {
|
||||||
|
default:
|
||||||
|
case eCompressionFormat_DXT1:
|
||||||
|
case eCompressionFormat_DXT5:
|
||||||
|
case eCompressionFormat_BPTC:
|
||||||
|
case eCompressionFormat_PVRTC:
|
||||||
|
case eCompressionFormat_ETC1:
|
||||||
|
outSz[0] = 4;
|
||||||
|
outSz[1] = 4;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns the size of the compressed block in bytes for the given format.
|
||||||
|
inline static uint32 GetBlockSize(ECompressionFormat fmt) {
|
||||||
|
switch(fmt) {
|
||||||
|
default:
|
||||||
|
case eCompressionFormat_DXT1:
|
||||||
|
case eCompressionFormat_PVRTC:
|
||||||
|
case eCompressionFormat_ETC1:
|
||||||
|
return 8;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case eCompressionFormat_DXT5:
|
||||||
|
case eCompressionFormat_BPTC:
|
||||||
|
return 16;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // namespace FasTC
|
||||||
|
|
||||||
|
#endif // _BASE_INCLUDE_COMPRESSIONFORMAT_H_
|
|
@ -45,6 +45,7 @@
|
||||||
#define __COMPRESSION_JOBS_H__
|
#define __COMPRESSION_JOBS_H__
|
||||||
|
|
||||||
#include "TexCompTypes.h"
|
#include "TexCompTypes.h"
|
||||||
|
#include "CompressionFormat.h"
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
# define ALIGN(x) __declspec( align(x) )
|
# define ALIGN(x) __declspec( align(x) )
|
||||||
|
@ -53,84 +54,149 @@
|
||||||
#endif
|
#endif
|
||||||
#define ALIGN_SSE ALIGN(16)
|
#define ALIGN_SSE ALIGN(16)
|
||||||
|
|
||||||
// This structure defines a compression job. Here, width and height are the dimensions
|
namespace FasTC {
|
||||||
// of the image in pixels. inBuf contains the R8G8B8A8 data that is to be compressed, and
|
|
||||||
// outBuf will contain the compressed BC7 data.
|
|
||||||
//
|
|
||||||
// Implicit sizes:
|
|
||||||
// inBuf - (width * height * 4) bytes
|
|
||||||
// outBuf - (width * height) bytes
|
|
||||||
struct CompressionJob {
|
|
||||||
private:
|
|
||||||
const uint8 *m_InBuf;
|
|
||||||
uint8 *m_OutBuf;
|
|
||||||
const uint32 m_Width;
|
|
||||||
const uint32 m_Height;
|
|
||||||
const uint32 m_RowBytes;
|
|
||||||
|
|
||||||
public:
|
// This structure defines a compression job. Here, width and height are the dimensions
|
||||||
const uint8 *InBuf() const { return m_InBuf; }
|
// of the image in pixels. inBuf contains the R8G8B8A8 data that is to be compressed, and
|
||||||
uint8 *OutBuf() const { return m_OutBuf; }
|
// outBuf will contain the compressed BC7 data.
|
||||||
uint32 Width() const { return m_Width; }
|
//
|
||||||
uint32 Height() const { return m_Height; }
|
// Implicit sizes:
|
||||||
uint32 RowBytes() const { return m_RowBytes; }
|
// inBuf - (width * height * 4) bytes
|
||||||
|
// outBuf - (width * height) bytes
|
||||||
|
class CompressionJob {
|
||||||
|
private:
|
||||||
|
ECompressionFormat m_Format;
|
||||||
|
const uint8 *m_InBuf;
|
||||||
|
uint8 *m_OutBuf;
|
||||||
|
uint32 m_Width;
|
||||||
|
uint32 m_Height;
|
||||||
|
uint32 m_XStart, m_XEnd;
|
||||||
|
uint32 m_YStart, m_YEnd;
|
||||||
|
|
||||||
CompressionJob(
|
public:
|
||||||
const uint8 *_inBuf,
|
ECompressionFormat Format() const { return m_Format; }
|
||||||
unsigned char *_outBuf,
|
const uint8 *InBuf() const { return m_InBuf; }
|
||||||
const uint32 _width,
|
uint8 *OutBuf() const { return m_OutBuf; }
|
||||||
const uint32 _height)
|
uint32 Width() const { return m_Width; }
|
||||||
: m_InBuf(_inBuf)
|
uint32 Height() const { return m_Height; }
|
||||||
, m_OutBuf(_outBuf)
|
uint32 XStart() const { return m_XStart; }
|
||||||
, m_Width(_width)
|
uint32 XEnd() const { return m_XEnd; }
|
||||||
, m_Height(_height)
|
uint32 YStart() const { return m_YStart; }
|
||||||
, m_RowBytes(_width)
|
uint32 YEnd() const { return m_YEnd; }
|
||||||
{ }
|
|
||||||
|
|
||||||
CompressionJob(
|
CompressionJob(
|
||||||
const uint8 *_inBuf,
|
ECompressionFormat _fmt,
|
||||||
unsigned char *_outBuf,
|
const uint8 *_inBuf,
|
||||||
const uint32 _width,
|
unsigned char *_outBuf,
|
||||||
const uint32 _height,
|
const uint32 _width,
|
||||||
const uint32 _rowbytes)
|
const uint32 _height)
|
||||||
: m_InBuf(_inBuf)
|
: m_Format(_fmt)
|
||||||
, m_OutBuf(_outBuf)
|
, m_InBuf(_inBuf)
|
||||||
, m_Width(_width)
|
, m_OutBuf(_outBuf)
|
||||||
, m_Height(_height)
|
, m_Width(_width)
|
||||||
, m_RowBytes(_rowbytes)
|
, m_Height(_height)
|
||||||
{ }
|
, m_XStart(0), m_XEnd(_width)
|
||||||
};
|
, m_YStart(0), m_YEnd(_height)
|
||||||
|
{ }
|
||||||
|
|
||||||
|
CompressionJob(
|
||||||
|
ECompressionFormat _fmt,
|
||||||
|
const uint8 *_inBuf,
|
||||||
|
unsigned char *_outBuf,
|
||||||
|
const uint32 _width,
|
||||||
|
const uint32 _height,
|
||||||
|
const uint32 _xOffset,
|
||||||
|
const uint32 _yOffset)
|
||||||
|
: m_Format(_fmt)
|
||||||
|
, m_InBuf(_inBuf)
|
||||||
|
, m_OutBuf(_outBuf)
|
||||||
|
, m_Width(_width)
|
||||||
|
, m_Height(_height)
|
||||||
|
, m_XStart(_xOffset), m_XEnd(_width)
|
||||||
|
, m_YStart(_yOffset), m_YEnd(_height)
|
||||||
|
{ }
|
||||||
|
|
||||||
|
CompressionJob(
|
||||||
|
ECompressionFormat _fmt,
|
||||||
|
const uint8 *_inBuf,
|
||||||
|
unsigned char *_outBuf,
|
||||||
|
const uint32 _width,
|
||||||
|
const uint32 _height,
|
||||||
|
const uint32 _xOffset,
|
||||||
|
const uint32 _yOffset,
|
||||||
|
const uint32 _xEndpoint,
|
||||||
|
const uint32 _yEndpoint)
|
||||||
|
: m_Format(_fmt)
|
||||||
|
, m_InBuf(_inBuf)
|
||||||
|
, m_OutBuf(_outBuf)
|
||||||
|
, m_Width(_width)
|
||||||
|
, m_Height(_height)
|
||||||
|
, m_XStart(_xOffset), m_XEnd(_xEndpoint)
|
||||||
|
, m_YStart(_yOffset), m_YEnd(_yEndpoint)
|
||||||
|
{ }
|
||||||
|
|
||||||
|
// Returns the x and y coordinates of the pixels that corresponds to the block
|
||||||
|
// index for the given format.
|
||||||
|
void BlockIdxToCoords(uint32 blockIdx, uint32 (&out)[2]) const {
|
||||||
|
uint32 blockDim[2];
|
||||||
|
GetBlockDimensions(Format(), blockDim);
|
||||||
|
|
||||||
|
const uint32 kNumBlocksX = Width() / blockDim[0];
|
||||||
|
|
||||||
|
const uint32 blockX = blockIdx % kNumBlocksX;
|
||||||
|
const uint32 blockY = blockIdx / kNumBlocksX;
|
||||||
|
|
||||||
|
out[0] = blockX * blockDim[0];
|
||||||
|
out[1] = blockY * blockDim[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns the x and y coordinates of the pixels that corresponds to the block
|
||||||
|
// index for the given format.
|
||||||
|
uint32 CoordsToBlockIdx(uint32 x, uint32 y) const {
|
||||||
|
uint32 blockDim[2];
|
||||||
|
GetBlockDimensions(Format(), blockDim);
|
||||||
|
|
||||||
|
const uint32 kNumBlocksX = Width() / blockDim[0];
|
||||||
|
|
||||||
|
const uint32 blockX = x / blockDim[0];
|
||||||
|
const uint32 blockY = y / blockDim[1];
|
||||||
|
|
||||||
|
return blockY * kNumBlocksX + blockX;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// This struct mirrors that for a compression job, but is used to decompress a BC7 stream. Here, inBuf
|
// This struct mirrors that for a compression job, but is used to decompress a BC7 stream. Here, inBuf
|
||||||
// is a buffer of BC7 data, and outBuf is the destination where we will copy the decompressed R8G8B8A8 data
|
// is a buffer of BC7 data, and outBuf is the destination where we will copy the decompressed R8G8B8A8 data
|
||||||
struct DecompressionJob {
|
class DecompressionJob {
|
||||||
private:
|
private:
|
||||||
const uint8 *m_InBuf;
|
const ECompressionFormat m_Format;
|
||||||
uint8 *m_OutBuf;
|
const uint8 *m_InBuf;
|
||||||
const uint32 m_Width;
|
uint8 *m_OutBuf;
|
||||||
const uint32 m_Height;
|
const uint32 m_Width;
|
||||||
|
const uint32 m_Height;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
const uint8 *InBuf() const { return m_InBuf; }
|
const uint8 *InBuf() const { return m_InBuf; }
|
||||||
uint8 *OutBuf() const { return m_OutBuf; }
|
uint8 *OutBuf() const { return m_OutBuf; }
|
||||||
uint32 Width() const { return m_Width; }
|
uint32 Width() const { return m_Width; }
|
||||||
uint32 Height() const { return m_Height; }
|
uint32 Height() const { return m_Height; }
|
||||||
|
uint32 Format() const { return m_Format; }
|
||||||
|
|
||||||
DecompressionJob(
|
DecompressionJob(
|
||||||
const uint8 *_inBuf,
|
ECompressionFormat _fmt,
|
||||||
unsigned char *_outBuf,
|
const uint8 *_inBuf, uint8 *_outBuf,
|
||||||
const uint32 _width,
|
uint32 _width, uint32 _height)
|
||||||
const uint32 _height)
|
: m_Format(_fmt)
|
||||||
: m_InBuf(_inBuf)
|
, m_InBuf(_inBuf)
|
||||||
, m_OutBuf(_outBuf)
|
, m_OutBuf(_outBuf)
|
||||||
, m_Width(_width)
|
, m_Width(_width)
|
||||||
, m_Height(_height)
|
, m_Height(_height)
|
||||||
{ }
|
{ }
|
||||||
};
|
};
|
||||||
|
|
||||||
// A structure for maintaining a list of textures to compress.
|
// A structure for maintaining a list of textures to compress.
|
||||||
struct CompressionJobList {
|
class CompressionJobList {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
// Initialize the list by specifying the total number of jobs that it will contain.
|
// Initialize the list by specifying the total number of jobs that it will contain.
|
||||||
// This constructor allocates the necessary memory to hold the array.
|
// This constructor allocates the necessary memory to hold the array.
|
||||||
|
@ -154,7 +220,7 @@ struct CompressionJobList {
|
||||||
const CompressionJob *GetJob(uint32 idx) const;
|
const CompressionJob *GetJob(uint32 idx) const;
|
||||||
uint32 *GetFinishedFlag(uint32 idx) const;
|
uint32 *GetFinishedFlag(uint32 idx) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
CompressionJob *m_Jobs;
|
CompressionJob *m_Jobs;
|
||||||
uint32 m_NumJobs;
|
uint32 m_NumJobs;
|
||||||
const uint32 m_TotalNumJobs;
|
const uint32 m_TotalNumJobs;
|
||||||
|
@ -163,9 +229,10 @@ struct CompressionJobList {
|
||||||
ALIGN(32) uint32 m_flag;
|
ALIGN(32) uint32 m_flag;
|
||||||
} *m_FinishedFlags;
|
} *m_FinishedFlags;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
ALIGN(32) uint32 m_CurrentJobIndex;
|
ALIGN(32) uint32 m_CurrentJobIndex;
|
||||||
ALIGN(32) uint32 m_CurrentBlockIndex;
|
ALIGN(32) uint32 m_CurrentBlockIndex;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
} // namespace FasTC
|
||||||
#endif // __COMPRESSION_JOBS_H__
|
#endif // __COMPRESSION_JOBS_H__
|
||||||
|
|
|
@ -48,6 +48,8 @@
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
|
||||||
|
namespace FasTC {
|
||||||
|
|
||||||
// Initialize the list by specifying the total number of jobs that it will contain.
|
// Initialize the list by specifying the total number of jobs that it will contain.
|
||||||
// This constructor allocates the necessary memory to hold the array.
|
// This constructor allocates the necessary memory to hold the array.
|
||||||
CompressionJobList::CompressionJobList(const uint32 nJobs)
|
CompressionJobList::CompressionJobList(const uint32 nJobs)
|
||||||
|
@ -128,3 +130,5 @@ uint32 *CompressionJobList::GetFinishedFlag(uint32 idx) const {
|
||||||
|
|
||||||
return &(m_FinishedFlags[idx].m_flag);
|
return &(m_FinishedFlags[idx].m_flag);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace FasTC
|
||||||
|
|
|
@ -56,7 +56,7 @@
|
||||||
void PrintUsage() {
|
void PrintUsage() {
|
||||||
fprintf(stderr, "Usage: tc [OPTIONS] imagefile\n");
|
fprintf(stderr, "Usage: tc [OPTIONS] imagefile\n");
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
fprintf(stderr, "\t-v\t\tVerbose mode: prints out Entropy, Mean Local Entropy, and MSSIM");
|
fprintf(stderr, "\t-v\t\tVerbose mode: prints out Entropy, Mean Local Entropy, and MSSIM\n");
|
||||||
fprintf(stderr, "\t-f\t\tFormat to use. Either \"BPTC\", \"ETC1\", \"DXT1\", \"DXT5\", or \"PVRTC\". Default: BPTC\n");
|
fprintf(stderr, "\t-f\t\tFormat to use. Either \"BPTC\", \"ETC1\", \"DXT1\", \"DXT5\", or \"PVRTC\". Default: BPTC\n");
|
||||||
fprintf(stderr, "\t-l\t\tSave an output log.\n");
|
fprintf(stderr, "\t-l\t\tSave an output log.\n");
|
||||||
fprintf(stderr, "\t-q <quality>\tSet compression quality level. Default: 50\n");
|
fprintf(stderr, "\t-q <quality>\tSet compression quality level. Default: 50\n");
|
||||||
|
@ -104,7 +104,7 @@ int main(int argc, char **argv) {
|
||||||
bool bUseAtomics = false;
|
bool bUseAtomics = false;
|
||||||
bool bUsePVRTexLib = false;
|
bool bUsePVRTexLib = false;
|
||||||
bool bVerbose = false;
|
bool bVerbose = false;
|
||||||
ECompressionFormat format = eCompressionFormat_BPTC;
|
FasTC::ECompressionFormat format = FasTC::eCompressionFormat_BPTC;
|
||||||
|
|
||||||
bool knowArg = false;
|
bool knowArg = false;
|
||||||
do {
|
do {
|
||||||
|
@ -131,16 +131,16 @@ int main(int argc, char **argv) {
|
||||||
exit(1);
|
exit(1);
|
||||||
} else {
|
} else {
|
||||||
if(!strcmp(argv[fileArg], "PVRTC")) {
|
if(!strcmp(argv[fileArg], "PVRTC")) {
|
||||||
format = eCompressionFormat_PVRTC;
|
format = FasTC::eCompressionFormat_PVRTC;
|
||||||
} else if(!strcmp(argv[fileArg], "PVRTCLib")) {
|
} else if(!strcmp(argv[fileArg], "PVRTCLib")) {
|
||||||
format = eCompressionFormat_PVRTC;
|
format = FasTC::eCompressionFormat_PVRTC;
|
||||||
bUsePVRTexLib = true;
|
bUsePVRTexLib = true;
|
||||||
} else if(!strcmp(argv[fileArg], "ETC1")) {
|
} else if(!strcmp(argv[fileArg], "ETC1")) {
|
||||||
format = eCompressionFormat_ETC1;
|
format = FasTC::eCompressionFormat_ETC1;
|
||||||
} else if(!strcmp(argv[fileArg], "DXT1")) {
|
} else if(!strcmp(argv[fileArg], "DXT1")) {
|
||||||
format = eCompressionFormat_DXT1;
|
format = FasTC::eCompressionFormat_DXT1;
|
||||||
} else if(!strcmp(argv[fileArg], "DXT5")) {
|
} else if(!strcmp(argv[fileArg], "DXT5")) {
|
||||||
format = eCompressionFormat_DXT5;
|
format = FasTC::eCompressionFormat_DXT5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -286,13 +286,13 @@ int main(int argc, char **argv) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(format == eCompressionFormat_BPTC) {
|
if(format == FasTC::eCompressionFormat_BPTC) {
|
||||||
strcat(basename, "-bc7.png");
|
strcat(basename, "-bc7.png");
|
||||||
} else if(format == eCompressionFormat_PVRTC) {
|
} else if(format == FasTC::eCompressionFormat_PVRTC) {
|
||||||
strcat(basename, "-pvrtc.png");
|
strcat(basename, "-pvrtc.png");
|
||||||
} else if(format == eCompressionFormat_DXT1) {
|
} else if(format == FasTC::eCompressionFormat_DXT1) {
|
||||||
strcat(basename, "-dxt1.png");
|
strcat(basename, "-dxt1.png");
|
||||||
} else if(format == eCompressionFormat_ETC1) {
|
} else if(format == FasTC::eCompressionFormat_ETC1) {
|
||||||
strcat(basename, "-etc1.png");
|
strcat(basename, "-etc1.png");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -45,22 +45,12 @@
|
||||||
#define _COMPRESSED_IMAGE_H_
|
#define _COMPRESSED_IMAGE_H_
|
||||||
|
|
||||||
#include "TexCompTypes.h"
|
#include "TexCompTypes.h"
|
||||||
|
#include "CompressionFormat.h"
|
||||||
enum ECompressionFormat {
|
|
||||||
eCompressionFormat_DXT1,
|
|
||||||
eCompressionFormat_DXT5,
|
|
||||||
eCompressionFormat_ETC1,
|
|
||||||
eCompressionFormat_BPTC,
|
|
||||||
eCompressionFormat_PVRTC,
|
|
||||||
|
|
||||||
kNumCompressionFormats
|
|
||||||
};
|
|
||||||
|
|
||||||
#include "Image.h"
|
#include "Image.h"
|
||||||
|
|
||||||
class CompressedImage : public FasTC::Image<FasTC::Pixel> {
|
class CompressedImage : public FasTC::Image<FasTC::Pixel> {
|
||||||
private:
|
private:
|
||||||
ECompressionFormat m_Format;
|
FasTC::ECompressionFormat m_Format;
|
||||||
uint8 *m_CompressedData;
|
uint8 *m_CompressedData;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
@ -73,7 +63,7 @@ class CompressedImage : public FasTC::Image<FasTC::Pixel> {
|
||||||
CompressedImage(
|
CompressedImage(
|
||||||
const uint32 width,
|
const uint32 width,
|
||||||
const uint32 height,
|
const uint32 height,
|
||||||
const ECompressionFormat format,
|
const FasTC::ECompressionFormat format,
|
||||||
const uint8 *data
|
const uint8 *data
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -85,8 +75,8 @@ class CompressedImage : public FasTC::Image<FasTC::Pixel> {
|
||||||
|
|
||||||
virtual void ComputePixels();
|
virtual void ComputePixels();
|
||||||
|
|
||||||
static uint32 GetCompressedSize(uint32 uncompressedSize, ECompressionFormat format);
|
static uint32 GetCompressedSize(uint32 uncompressedSize, FasTC::ECompressionFormat format);
|
||||||
static uint32 GetUncompressedSize(uint32 compressedSize, ECompressionFormat format) {
|
static uint32 GetUncompressedSize(uint32 compressedSize, FasTC::ECompressionFormat format) {
|
||||||
uint32 cmp = GetCompressedSize(compressedSize, format);
|
uint32 cmp = GetCompressedSize(compressedSize, format);
|
||||||
return compressedSize * (compressedSize / cmp);
|
return compressedSize * (compressedSize / cmp);
|
||||||
}
|
}
|
||||||
|
@ -104,7 +94,7 @@ class CompressedImage : public FasTC::Image<FasTC::Pixel> {
|
||||||
// size for a given compressed image.
|
// size for a given compressed image.
|
||||||
bool DecompressImage(uint8 *outBuf, uint32 outBufSz) const;
|
bool DecompressImage(uint8 *outBuf, uint32 outBufSz) const;
|
||||||
|
|
||||||
ECompressionFormat GetFormat() const { return m_Format; }
|
FasTC::ECompressionFormat GetFormat() const { return m_Format; }
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // _COMPRESSED_IMAGE_H_
|
#endif // _COMPRESSED_IMAGE_H_
|
||||||
|
|
|
@ -57,7 +57,7 @@ struct SCompressionSettings {
|
||||||
SCompressionSettings(); // defaults
|
SCompressionSettings(); // defaults
|
||||||
|
|
||||||
// The compression format for the image.
|
// The compression format for the image.
|
||||||
ECompressionFormat format;
|
FasTC::ECompressionFormat format;
|
||||||
|
|
||||||
// The flag that requests us to use SIMD, if it is available
|
// The flag that requests us to use SIMD, if it is available
|
||||||
bool bUseSIMD;
|
bool bUseSIMD;
|
||||||
|
|
|
@ -56,6 +56,10 @@
|
||||||
#include "DXTCompressor.h"
|
#include "DXTCompressor.h"
|
||||||
#include "ETCCompressor.h"
|
#include "ETCCompressor.h"
|
||||||
|
|
||||||
|
using FasTC::CompressionJob;
|
||||||
|
using FasTC::DecompressionJob;
|
||||||
|
using FasTC::ECompressionFormat;
|
||||||
|
|
||||||
CompressedImage::CompressedImage( const CompressedImage &other )
|
CompressedImage::CompressedImage( const CompressedImage &other )
|
||||||
: Image(other)
|
: Image(other)
|
||||||
, m_Format(other.m_Format)
|
, m_Format(other.m_Format)
|
||||||
|
@ -109,17 +113,17 @@ bool CompressedImage::DecompressImage(unsigned char *outBuf, unsigned int outBuf
|
||||||
assert(outBufSz == GetUncompressedSize());
|
assert(outBufSz == GetUncompressedSize());
|
||||||
|
|
||||||
uint8 *byteData = reinterpret_cast<uint8 *>(m_CompressedData);
|
uint8 *byteData = reinterpret_cast<uint8 *>(m_CompressedData);
|
||||||
DecompressionJob dj (byteData, outBuf, GetWidth(), GetHeight());
|
DecompressionJob dj (m_Format, byteData, outBuf, GetWidth(), GetHeight());
|
||||||
switch(m_Format) {
|
switch(m_Format) {
|
||||||
case eCompressionFormat_DXT1:
|
case FasTC::eCompressionFormat_DXT1:
|
||||||
DXTC::DecompressDXT1(dj);
|
DXTC::DecompressDXT1(dj);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case eCompressionFormat_ETC1:
|
case FasTC::eCompressionFormat_ETC1:
|
||||||
ETCC::Decompress(dj);
|
ETCC::Decompress(dj);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case eCompressionFormat_PVRTC:
|
case FasTC::eCompressionFormat_PVRTC:
|
||||||
{
|
{
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
PVRTCC::Decompress(dj, false, PVRTCC::eWrapMode_Wrap, true);
|
PVRTCC::Decompress(dj, false, PVRTCC::eWrapMode_Wrap, true);
|
||||||
|
@ -129,7 +133,7 @@ bool CompressedImage::DecompressImage(unsigned char *outBuf, unsigned int outBuf
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case eCompressionFormat_BPTC:
|
case FasTC::eCompressionFormat_BPTC:
|
||||||
{
|
{
|
||||||
BC7C::Decompress(dj);
|
BC7C::Decompress(dj);
|
||||||
}
|
}
|
||||||
|
@ -164,24 +168,15 @@ void CompressedImage::ComputePixels() {
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32 CompressedImage::GetCompressedSize(uint32 uncompressedSize, ECompressionFormat format) {
|
uint32 CompressedImage::GetCompressedSize(uint32 uncompressedSize, ECompressionFormat format) {
|
||||||
assert(uncompressedSize % 8 == 0);
|
uint32 blockDim[2];
|
||||||
|
GetBlockDimensions(format, blockDim);
|
||||||
|
|
||||||
uint32 cmpDataSzNeeded = 0;
|
const uint32 uncompBlockSz = blockDim[0] * blockDim[1] * sizeof(uint32);
|
||||||
switch(format) {
|
const uint32 blockSz = GetBlockSize(format);
|
||||||
default:
|
|
||||||
assert(!"Not implemented!");
|
|
||||||
// Fall through V
|
|
||||||
case eCompressionFormat_ETC1:
|
|
||||||
case eCompressionFormat_DXT1:
|
|
||||||
case eCompressionFormat_PVRTC:
|
|
||||||
cmpDataSzNeeded = uncompressedSize / 8;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case eCompressionFormat_DXT5:
|
assert(uncompBlockSz % blockSz == 0);
|
||||||
case eCompressionFormat_BPTC:
|
const uint32 scale = uncompBlockSz / blockSz;
|
||||||
cmpDataSzNeeded = uncompressedSize / 4;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return cmpDataSzNeeded;
|
assert(uncompressedSize % blockSz == 0);
|
||||||
|
return uncompressedSize / scale;
|
||||||
}
|
}
|
||||||
|
|
|
@ -60,12 +60,12 @@
|
||||||
// returns the compressed image data into outData. It is assumed that there is
|
// returns the compressed image data into outData. It is assumed that there is
|
||||||
// enough space allocated for outData to store the compressed data. Allocation
|
// enough space allocated for outData to store the compressed data. Allocation
|
||||||
// is dependent on the compression format.
|
// is dependent on the compression format.
|
||||||
typedef void (* CompressionFunc)(const CompressionJob &);
|
typedef void (* CompressionFunc)(const FasTC::CompressionJob &);
|
||||||
|
|
||||||
// A compression function format. It takes the raw data and image dimensions and
|
// A compression function format. It takes the raw data and image dimensions and
|
||||||
// returns the compressed image data into outData. It is assumed that there is
|
// returns the compressed image data into outData. It is assumed that there is
|
||||||
// enough space allocated for outData to store the compressed data. Allocation
|
// enough space allocated for outData to store the compressed data. Allocation
|
||||||
// is dependent on the compression format.
|
// is dependent on the compression format.
|
||||||
typedef void (* CompressionFuncWithStats)(const CompressionJob &, std::ostream *logStream);
|
typedef void (* CompressionFuncWithStats)(const FasTC::CompressionJob &, std::ostream *logStream);
|
||||||
|
|
||||||
#endif // CORE_SRC_COMPRESSIONFUNCS_H_
|
#endif // CORE_SRC_COMPRESSIONFUNCS_H_
|
||||||
|
|
|
@ -62,6 +62,10 @@
|
||||||
#include "ThreadGroup.h"
|
#include "ThreadGroup.h"
|
||||||
#include "WorkerQueue.h"
|
#include "WorkerQueue.h"
|
||||||
|
|
||||||
|
using FasTC::CompressionJob;
|
||||||
|
using FasTC::CompressionJobList;
|
||||||
|
using FasTC::ECompressionFormat;
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static void clamp(T &x, const T &minX, const T &maxX) {
|
static void clamp(T &x, const T &minX, const T &maxX) {
|
||||||
x = std::max(std::min(maxX, x), minX);
|
x = std::max(std::min(maxX, x), minX);
|
||||||
|
@ -86,7 +90,7 @@ static void CompressPVRTCLib(const CompressionJob &cj) {
|
||||||
}
|
}
|
||||||
|
|
||||||
SCompressionSettings:: SCompressionSettings()
|
SCompressionSettings:: SCompressionSettings()
|
||||||
: format(eCompressionFormat_BPTC)
|
: format(FasTC::eCompressionFormat_BPTC)
|
||||||
, bUseSIMD(false)
|
, bUseSIMD(false)
|
||||||
, iNumThreads(1)
|
, iNumThreads(1)
|
||||||
, iQuality(50)
|
, iQuality(50)
|
||||||
|
@ -98,16 +102,16 @@ SCompressionSettings:: SCompressionSettings()
|
||||||
static CompressionFuncWithStats ChooseFuncFromSettingsWithStats(const SCompressionSettings &s) {
|
static CompressionFuncWithStats ChooseFuncFromSettingsWithStats(const SCompressionSettings &s) {
|
||||||
switch(s.format) {
|
switch(s.format) {
|
||||||
|
|
||||||
case eCompressionFormat_BPTC:
|
case FasTC::eCompressionFormat_BPTC:
|
||||||
{
|
{
|
||||||
return BC7C::CompressWithStats;
|
return BC7C::CompressWithStats;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case eCompressionFormat_ETC1:
|
case FasTC::eCompressionFormat_ETC1:
|
||||||
case eCompressionFormat_DXT1:
|
case FasTC::eCompressionFormat_DXT1:
|
||||||
case eCompressionFormat_DXT5:
|
case FasTC::eCompressionFormat_DXT5:
|
||||||
case eCompressionFormat_PVRTC:
|
case FasTC::eCompressionFormat_PVRTC:
|
||||||
{
|
{
|
||||||
// !FIXME! actually implement one of these methods...
|
// !FIXME! actually implement one of these methods...
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -124,7 +128,7 @@ static CompressionFuncWithStats ChooseFuncFromSettingsWithStats(const SCompress
|
||||||
|
|
||||||
static CompressionFunc ChooseFuncFromSettings(const SCompressionSettings &s) {
|
static CompressionFunc ChooseFuncFromSettings(const SCompressionSettings &s) {
|
||||||
switch(s.format) {
|
switch(s.format) {
|
||||||
case eCompressionFormat_BPTC:
|
case FasTC::eCompressionFormat_BPTC:
|
||||||
{
|
{
|
||||||
BC7C::SetQualityLevel(s.iQuality);
|
BC7C::SetQualityLevel(s.iQuality);
|
||||||
#ifdef HAS_SSE_41
|
#ifdef HAS_SSE_41
|
||||||
|
@ -136,13 +140,13 @@ static CompressionFunc ChooseFuncFromSettings(const SCompressionSettings &s) {
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case eCompressionFormat_DXT1:
|
case FasTC::eCompressionFormat_DXT1:
|
||||||
return DXTC::CompressImageDXT1;
|
return DXTC::CompressImageDXT1;
|
||||||
|
|
||||||
case eCompressionFormat_DXT5:
|
case FasTC::eCompressionFormat_DXT5:
|
||||||
return DXTC::CompressImageDXT5;
|
return DXTC::CompressImageDXT5;
|
||||||
|
|
||||||
case eCompressionFormat_PVRTC:
|
case FasTC::eCompressionFormat_PVRTC:
|
||||||
{
|
{
|
||||||
if(s.bUsePVRTexLib) {
|
if(s.bUsePVRTexLib) {
|
||||||
return CompressPVRTCLib;
|
return CompressPVRTCLib;
|
||||||
|
@ -151,7 +155,7 @@ static CompressionFunc ChooseFuncFromSettings(const SCompressionSettings &s) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
case eCompressionFormat_ETC1:
|
case FasTC::eCompressionFormat_ETC1:
|
||||||
return ETCC::Compress_RG;
|
return ETCC::Compress_RG;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
@ -168,11 +172,8 @@ static void ReportError(const char *msg) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static double CompressImageInSerial(
|
static double CompressImageInSerial(
|
||||||
const uint8 *imgData,
|
const CompressionJob &job,
|
||||||
const uint32 imgWidth,
|
const SCompressionSettings &settings
|
||||||
const uint32 imgHeight,
|
|
||||||
const SCompressionSettings &settings,
|
|
||||||
unsigned char *outBuf
|
|
||||||
) {
|
) {
|
||||||
CompressionFunc f = ChooseFuncFromSettings(settings);
|
CompressionFunc f = ChooseFuncFromSettings(settings);
|
||||||
CompressionFuncWithStats fStats = ChooseFuncFromSettingsWithStats(settings);
|
CompressionFuncWithStats fStats = ChooseFuncFromSettingsWithStats(settings);
|
||||||
|
@ -185,11 +186,10 @@ static double CompressImageInSerial(
|
||||||
stopWatch.Reset();
|
stopWatch.Reset();
|
||||||
stopWatch.Start();
|
stopWatch.Start();
|
||||||
|
|
||||||
CompressionJob cj (imgData, outBuf, imgWidth, imgHeight);
|
|
||||||
if(fStats && settings.logStream) {
|
if(fStats && settings.logStream) {
|
||||||
(*fStats)(cj, settings.logStream);
|
(*fStats)(job, settings.logStream);
|
||||||
} else {
|
} else {
|
||||||
(*f)(cj);
|
(*f)(job);
|
||||||
}
|
}
|
||||||
|
|
||||||
stopWatch.Stop();
|
stopWatch.Stop();
|
||||||
|
@ -231,10 +231,8 @@ class AtomicThreadUnit : public TCCallable {
|
||||||
};
|
};
|
||||||
|
|
||||||
static double CompressImageWithAtomics(
|
static double CompressImageWithAtomics(
|
||||||
const unsigned char *imgData,
|
const CompressionJob &cj,
|
||||||
const unsigned int width, const unsigned int height,
|
const SCompressionSettings &settings
|
||||||
const SCompressionSettings &settings,
|
|
||||||
unsigned char *outBuf
|
|
||||||
) {
|
) {
|
||||||
CompressionFunc f = ChooseFuncFromSettings(settings);
|
CompressionFunc f = ChooseFuncFromSettings(settings);
|
||||||
|
|
||||||
|
@ -242,7 +240,7 @@ static double CompressImageWithAtomics(
|
||||||
const int nTimes = settings.iNumCompressions;
|
const int nTimes = settings.iNumCompressions;
|
||||||
CompressionJobList cjl (nTimes);
|
CompressionJobList cjl (nTimes);
|
||||||
for(int i = 0; i < nTimes; i++) {
|
for(int i = 0; i < nTimes; i++) {
|
||||||
if(!cjl.AddJob(CompressionJob(imgData, outBuf, height, width))) {
|
if(!cjl.AddJob(cj)) {
|
||||||
assert(!"Error adding compression job to job list!");
|
assert(!"Error adding compression job to job list!");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -287,10 +285,8 @@ static double CompressImageWithAtomics(
|
||||||
}
|
}
|
||||||
#else // HAS_ATOMICS
|
#else // HAS_ATOMICS
|
||||||
static double CompressImageWithAtomics(
|
static double CompressImageWithAtomics(
|
||||||
const unsigned char *imgData,
|
const CompressionJob &cj,
|
||||||
const unsigned int width, const unsigned int height,
|
const SCompressionSettings &settings
|
||||||
const SCompressionSettings &settings,
|
|
||||||
unsigned char *outBuf
|
|
||||||
) {
|
) {
|
||||||
fprintf(stderr, "Compiler does not support atomic operations!");
|
fprintf(stderr, "Compiler does not support atomic operations!");
|
||||||
}
|
}
|
||||||
|
@ -314,15 +310,13 @@ static double CompressThreadGroup(ThreadGroup &tgrp, const SCompressionSettings
|
||||||
cmpTimeTotal += tgrp.GetStopWatch().TimeInMilliseconds();
|
cmpTimeTotal += tgrp.GetStopWatch().TimeInMilliseconds();
|
||||||
}
|
}
|
||||||
|
|
||||||
tgrp.CleanUpThreads();
|
tgrp.CleanUpThreads();
|
||||||
return cmpTimeTotal;
|
return cmpTimeTotal;
|
||||||
}
|
}
|
||||||
|
|
||||||
static double CompressImageWithThreads(
|
static double CompressImageWithThreads(
|
||||||
const unsigned char *imgData,
|
const CompressionJob &job,
|
||||||
const unsigned int imgDataSz,
|
const SCompressionSettings &settings
|
||||||
const SCompressionSettings &settings,
|
|
||||||
unsigned char *outBuf
|
|
||||||
) {
|
) {
|
||||||
|
|
||||||
CompressionFunc f = ChooseFuncFromSettings(settings);
|
CompressionFunc f = ChooseFuncFromSettings(settings);
|
||||||
|
@ -330,11 +324,11 @@ static double CompressImageWithThreads(
|
||||||
|
|
||||||
double cmpTimeTotal = 0.0;
|
double cmpTimeTotal = 0.0;
|
||||||
if(fStats && settings.logStream) {
|
if(fStats && settings.logStream) {
|
||||||
ThreadGroup tgrp (settings.iNumThreads, imgData, imgDataSz, fStats, settings.logStream, outBuf);
|
ThreadGroup tgrp (settings.iNumThreads, job, fStats, settings.logStream);
|
||||||
cmpTimeTotal = CompressThreadGroup(tgrp, settings);
|
cmpTimeTotal = CompressThreadGroup(tgrp, settings);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
ThreadGroup tgrp (settings.iNumThreads, imgData, imgDataSz, f, outBuf);
|
ThreadGroup tgrp (settings.iNumThreads, job, f);
|
||||||
cmpTimeTotal = CompressThreadGroup(tgrp, settings);
|
cmpTimeTotal = CompressThreadGroup(tgrp, settings);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -342,11 +336,14 @@ static double CompressImageWithThreads(
|
||||||
return cmpTime;
|
return cmpTime;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static double RunWorkerQueue(WorkerQueue &wq) {
|
||||||
|
wq.Run();
|
||||||
|
return wq.GetStopWatch().TimeInMilliseconds();
|
||||||
|
}
|
||||||
|
|
||||||
static double CompressImageWithWorkerQueue(
|
static double CompressImageWithWorkerQueue(
|
||||||
const unsigned char *imgData,
|
const CompressionJob &job,
|
||||||
const unsigned int imgDataSz,
|
const SCompressionSettings &settings
|
||||||
const SCompressionSettings &settings,
|
|
||||||
unsigned char *outBuf
|
|
||||||
) {
|
) {
|
||||||
CompressionFunc f = ChooseFuncFromSettings(settings);
|
CompressionFunc f = ChooseFuncFromSettings(settings);
|
||||||
CompressionFuncWithStats fStats = ChooseFuncFromSettingsWithStats(settings);
|
CompressionFuncWithStats fStats = ChooseFuncFromSettingsWithStats(settings);
|
||||||
|
@ -357,29 +354,21 @@ static double CompressImageWithWorkerQueue(
|
||||||
settings.iNumCompressions,
|
settings.iNumCompressions,
|
||||||
settings.iNumThreads,
|
settings.iNumThreads,
|
||||||
settings.iJobSize,
|
settings.iJobSize,
|
||||||
imgData,
|
job,
|
||||||
imgDataSz,
|
|
||||||
fStats,
|
fStats,
|
||||||
settings.logStream,
|
settings.logStream
|
||||||
outBuf
|
|
||||||
);
|
);
|
||||||
|
cmpTimeTotal = RunWorkerQueue(wq);
|
||||||
wq.Run();
|
|
||||||
cmpTimeTotal = wq.GetStopWatch().TimeInMilliseconds();
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
WorkerQueue wq (
|
WorkerQueue wq (
|
||||||
settings.iNumCompressions,
|
settings.iNumCompressions,
|
||||||
settings.iNumThreads,
|
settings.iNumThreads,
|
||||||
settings.iJobSize,
|
settings.iJobSize,
|
||||||
imgData,
|
job,
|
||||||
imgDataSz,
|
f
|
||||||
f,
|
|
||||||
outBuf
|
|
||||||
);
|
);
|
||||||
|
cmpTimeTotal = RunWorkerQueue(wq);
|
||||||
wq.Run();
|
|
||||||
cmpTimeTotal = wq.GetStopWatch().TimeInMilliseconds();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return cmpTimeTotal / double(settings.iNumCompressions);
|
return cmpTimeTotal / double(settings.iNumCompressions);
|
||||||
|
@ -458,7 +447,7 @@ bool CompressImageData(
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32 numThreads = settings.iNumThreads;
|
uint32 numThreads = settings.iNumThreads;
|
||||||
if(settings.format == eCompressionFormat_PVRTC &&
|
if(settings.format == FasTC::eCompressionFormat_PVRTC &&
|
||||||
(settings.iNumThreads > 1 || settings.logStream)) {
|
(settings.iNumThreads > 1 || settings.logStream)) {
|
||||||
if(settings.iNumThreads > 1) {
|
if(settings.iNumThreads > 1) {
|
||||||
ReportError("WARNING - PVRTC compressor does not support multithreading.");
|
ReportError("WARNING - PVRTC compressor does not support multithreading.");
|
||||||
|
@ -483,22 +472,22 @@ bool CompressImageData(
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
CompressionFunc f = ChooseFuncFromSettings(settings);
|
if(ChooseFuncFromSettings(settings)) {
|
||||||
if(f) {
|
|
||||||
|
CompressionJob cj(settings.format, data, compressedData, width, height);
|
||||||
|
|
||||||
double cmpMSTime = 0.0;
|
double cmpMSTime = 0.0;
|
||||||
|
|
||||||
if(numThreads > 1) {
|
if(numThreads > 1) {
|
||||||
if(settings.bUseAtomics) {
|
if(settings.bUseAtomics) {
|
||||||
cmpMSTime = CompressImageWithAtomics(data, width, height, settings, compressedData);
|
cmpMSTime = CompressImageWithAtomics(cj, settings);
|
||||||
} else if(settings.iJobSize > 0) {
|
} else if(settings.iJobSize > 0) {
|
||||||
cmpMSTime = CompressImageWithWorkerQueue(data, dataSz, settings, compressedData);
|
cmpMSTime = CompressImageWithWorkerQueue(cj, settings);
|
||||||
} else {
|
} else {
|
||||||
cmpMSTime = CompressImageWithThreads(data, dataSz, settings, compressedData);
|
cmpMSTime = CompressImageWithThreads(cj, settings);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
cmpMSTime = CompressImageInSerial(data, width, height, settings, compressedData);
|
cmpMSTime = CompressImageInSerial(cj, settings);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Report compression time
|
// Report compression time
|
||||||
|
|
|
@ -49,23 +49,22 @@
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
|
using FasTC::CompressionJob;
|
||||||
|
|
||||||
CmpThread::CmpThread()
|
CmpThread::CmpThread()
|
||||||
: m_StartBarrier(NULL)
|
: m_ParentCounter(NULL)
|
||||||
, m_ParentCounter(NULL)
|
, m_StartBarrier(NULL)
|
||||||
, m_ParentCounterLock(NULL)
|
, m_ParentCounterLock(NULL)
|
||||||
, m_FinishCV(NULL)
|
, m_FinishCV(NULL)
|
||||||
, m_Width(0)
|
, m_ParentExitFlag(NULL)
|
||||||
, m_Height(0)
|
, m_Job(CompressionJob(FasTC::kNumCompressionFormats, NULL, NULL, 0, 0))
|
||||||
, m_CmpFunc(NULL)
|
, m_CmpFunc(NULL)
|
||||||
, m_CmpFuncWithStats(NULL)
|
, m_CmpFuncWithStats(NULL)
|
||||||
, m_LogStream(NULL)
|
, m_LogStream(NULL)
|
||||||
, m_OutBuf(NULL)
|
|
||||||
, m_InBuf(NULL)
|
|
||||||
, m_ParentExitFlag(NULL)
|
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
void CmpThread::operator()() {
|
void CmpThread::operator()() {
|
||||||
if(!m_OutBuf || !m_InBuf
|
if(!m_Job.OutBuf() || !m_Job.InBuf()
|
||||||
|| !m_ParentCounter || !m_ParentCounterLock || !m_FinishCV
|
|| !m_ParentCounter || !m_ParentCounterLock || !m_FinishCV
|
||||||
|| !m_StartBarrier
|
|| !m_StartBarrier
|
||||||
|| !m_ParentExitFlag
|
|| !m_ParentExitFlag
|
||||||
|
@ -87,11 +86,10 @@ void CmpThread::operator()() {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
CompressionJob cj (m_InBuf, m_OutBuf, m_Width, m_Height);
|
|
||||||
if(m_CmpFunc)
|
if(m_CmpFunc)
|
||||||
(*m_CmpFunc)(cj);
|
(*m_CmpFunc)(m_Job);
|
||||||
else
|
else
|
||||||
(*m_CmpFuncWithStats)(cj, m_LogStream);
|
(*m_CmpFuncWithStats)(m_Job, m_LogStream);
|
||||||
|
|
||||||
{
|
{
|
||||||
TCLock lock(*m_ParentCounterLock);
|
TCLock lock(*m_ParentCounterLock);
|
||||||
|
@ -102,39 +100,19 @@ void CmpThread::operator()() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ThreadGroup::ThreadGroup( int numThreads, const unsigned char *inBuf, unsigned int inBufSz, CompressionFunc func, unsigned char *outBuf )
|
ThreadGroup::ThreadGroup(uint32 numThreads,
|
||||||
|
const CompressionJob &job,
|
||||||
|
CompressionFunc func)
|
||||||
: m_StartBarrier(new TCBarrier(numThreads + 1))
|
: m_StartBarrier(new TCBarrier(numThreads + 1))
|
||||||
, m_FinishMutex(new TCMutex())
|
, m_FinishMutex(new TCMutex())
|
||||||
, m_FinishCV(new TCConditionVariable())
|
, m_FinishCV(new TCConditionVariable())
|
||||||
, m_NumThreads(numThreads)
|
, m_NumThreads(numThreads)
|
||||||
, m_ActiveThreads(0)
|
, m_ActiveThreads(0)
|
||||||
, m_ImageDataSz(inBufSz)
|
, m_Job(job)
|
||||||
, m_ImageData(inBuf)
|
|
||||||
, m_OutBuf(outBuf)
|
|
||||||
, m_ThreadState(eThreadState_Done)
|
, m_ThreadState(eThreadState_Done)
|
||||||
, m_ExitFlag(false)
|
, m_ExitFlag(false)
|
||||||
, m_CompressedBlockSize(
|
|
||||||
(func == BC7C::Compress
|
|
||||||
#ifdef HAS_SSE_41
|
|
||||||
|| func == BC7C::CompressImageBC7SIMD
|
|
||||||
#endif
|
|
||||||
)?
|
|
||||||
16
|
|
||||||
:
|
|
||||||
0
|
|
||||||
)
|
|
||||||
, m_UncompressedBlockSize(
|
|
||||||
(func == BC7C::Compress
|
|
||||||
#ifdef HAS_SSE_41
|
|
||||||
|| func == BC7C::CompressImageBC7SIMD
|
|
||||||
#endif
|
|
||||||
)?
|
|
||||||
64
|
|
||||||
:
|
|
||||||
0
|
|
||||||
)
|
|
||||||
{
|
{
|
||||||
for(int i = 0; i < kMaxNumThreads; i++) {
|
for(uint32 i = 0; i < kMaxNumThreads; i++) {
|
||||||
// Thread synchronization primitives
|
// Thread synchronization primitives
|
||||||
m_Threads[i].m_ParentCounterLock = m_FinishMutex;
|
m_Threads[i].m_ParentCounterLock = m_FinishMutex;
|
||||||
m_Threads[i].m_FinishCV = m_FinishCV;
|
m_Threads[i].m_FinishCV = m_FinishCV;
|
||||||
|
@ -146,37 +124,21 @@ ThreadGroup::ThreadGroup( int numThreads, const unsigned char *inBuf, unsigned i
|
||||||
}
|
}
|
||||||
|
|
||||||
ThreadGroup::ThreadGroup(
|
ThreadGroup::ThreadGroup(
|
||||||
int numThreads,
|
uint32 numThreads,
|
||||||
const unsigned char *inBuf,
|
const CompressionJob &job,
|
||||||
unsigned int inBufSz,
|
|
||||||
CompressionFuncWithStats func,
|
CompressionFuncWithStats func,
|
||||||
std::ostream *logStream,
|
std::ostream *logStream
|
||||||
unsigned char *outBuf
|
|
||||||
)
|
)
|
||||||
: m_StartBarrier(new TCBarrier(numThreads + 1))
|
: m_StartBarrier(new TCBarrier(numThreads + 1))
|
||||||
, m_FinishMutex(new TCMutex())
|
, m_FinishMutex(new TCMutex())
|
||||||
, m_FinishCV(new TCConditionVariable())
|
, m_FinishCV(new TCConditionVariable())
|
||||||
, m_NumThreads(numThreads)
|
, m_NumThreads(numThreads)
|
||||||
, m_ActiveThreads(0)
|
, m_ActiveThreads(0)
|
||||||
, m_ImageDataSz(inBufSz)
|
, m_Job(job)
|
||||||
, m_ImageData(inBuf)
|
|
||||||
, m_OutBuf(outBuf)
|
|
||||||
, m_ThreadState(eThreadState_Done)
|
, m_ThreadState(eThreadState_Done)
|
||||||
, m_ExitFlag(false)
|
, m_ExitFlag(false)
|
||||||
, m_CompressedBlockSize(
|
|
||||||
(func == BC7C::CompressWithStats)?
|
|
||||||
16
|
|
||||||
:
|
|
||||||
0
|
|
||||||
)
|
|
||||||
, m_UncompressedBlockSize(
|
|
||||||
(func == BC7C::CompressWithStats)?
|
|
||||||
64
|
|
||||||
:
|
|
||||||
0
|
|
||||||
)
|
|
||||||
{
|
{
|
||||||
for(int i = 0; i < kMaxNumThreads; i++) {
|
for(uint32 i = 0; i < kMaxNumThreads; i++) {
|
||||||
// Thread synchronization primitives
|
// Thread synchronization primitives
|
||||||
m_Threads[i].m_ParentCounterLock = m_FinishMutex;
|
m_Threads[i].m_ParentCounterLock = m_FinishMutex;
|
||||||
m_Threads[i].m_FinishCV = m_FinishCV;
|
m_Threads[i].m_FinishCV = m_FinishCV;
|
||||||
|
@ -209,10 +171,11 @@ bool ThreadGroup::PrepareThreads() {
|
||||||
|
|
||||||
// We can assume that the image data is in block stream order
|
// We can assume that the image data is in block stream order
|
||||||
// so, the size of the data given to each thread will be (nb*4)x4
|
// so, the size of the data given to each thread will be (nb*4)x4
|
||||||
int numBlocks = m_ImageDataSz / 64;
|
uint32 blockDim[2];
|
||||||
|
GetBlockDimensions(m_Job.Format(), blockDim);
|
||||||
int blocksProcessed = 0;
|
uint32 numBlocks = (m_Job.Width() * m_Job.Height()) / (blockDim[0] * blockDim[1]);
|
||||||
int blocksPerThread = (numBlocks/m_NumThreads) + ((numBlocks % m_NumThreads)? 1 : 0);
|
uint32 blocksProcessed = 0;
|
||||||
|
uint32 blocksPerThread = (numBlocks/m_NumThreads) + ((numBlocks % m_NumThreads)? 1 : 0);
|
||||||
|
|
||||||
// Currently no threads are finished...
|
// Currently no threads are finished...
|
||||||
m_ThreadsFinished = 0;
|
m_ThreadsFinished = 0;
|
||||||
|
@ -226,11 +189,22 @@ bool ThreadGroup::PrepareThreads() {
|
||||||
numBlocksThisThread = numBlocks - blocksProcessed;
|
numBlocksThisThread = numBlocks - blocksProcessed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint32 start[2], end[2];
|
||||||
|
m_Job.BlockIdxToCoords(blocksProcessed, start);
|
||||||
|
m_Job.BlockIdxToCoords(blocksProcessed + numBlocksThisThread, end);
|
||||||
|
|
||||||
|
// !TODO! This should be moved to a unit test...
|
||||||
|
assert(m_Job.CoordsToBlockIdx(start[0], start[1]) == blocksProcessed);
|
||||||
|
assert(m_Job.CoordsToBlockIdx(end[0], end[1]) == blocksProcessed + numBlocksThisThread);
|
||||||
|
|
||||||
|
CompressionJob cj(m_Job.Format(),
|
||||||
|
m_Job.InBuf(), m_Job.OutBuf(),
|
||||||
|
m_Job.Width(), m_Job.Height(),
|
||||||
|
start[0], start[1],
|
||||||
|
end[0], end[1]);
|
||||||
|
|
||||||
CmpThread &t = m_Threads[m_ActiveThreads];
|
CmpThread &t = m_Threads[m_ActiveThreads];
|
||||||
t.m_Height = 4;
|
t.m_Job = cj;
|
||||||
t.m_Width = numBlocksThisThread * 4;
|
|
||||||
t.m_OutBuf = m_OutBuf + (blocksProcessed * m_CompressedBlockSize);
|
|
||||||
t.m_InBuf = m_ImageData + (blocksProcessed * m_UncompressedBlockSize);
|
|
||||||
|
|
||||||
blocksProcessed += numBlocksThisThread;
|
blocksProcessed += numBlocksThisThread;
|
||||||
|
|
||||||
|
@ -280,7 +254,7 @@ bool ThreadGroup::CleanUpThreads() {
|
||||||
m_StartBarrier->Wait();
|
m_StartBarrier->Wait();
|
||||||
|
|
||||||
// Clean up.
|
// Clean up.
|
||||||
for(int i = 0; i < m_ActiveThreads; i++) {
|
for(uint32 i = 0; i < m_ActiveThreads; i++) {
|
||||||
m_ThreadHandles[i]->Join();
|
m_ThreadHandles[i]->Join();
|
||||||
delete m_ThreadHandles[i];
|
delete m_ThreadHandles[i];
|
||||||
}
|
}
|
||||||
|
|
|
@ -54,26 +54,19 @@ struct CmpThread : public TCCallable {
|
||||||
friend class ThreadGroup;
|
friend class ThreadGroup;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
TCBarrier *m_StartBarrier;
|
uint32 *m_ParentCounter;
|
||||||
|
|
||||||
int *m_ParentCounter;
|
|
||||||
|
|
||||||
|
TCBarrier *m_StartBarrier;
|
||||||
TCMutex *m_ParentCounterLock;
|
TCMutex *m_ParentCounterLock;
|
||||||
TCConditionVariable *m_FinishCV;
|
TCConditionVariable *m_FinishCV;
|
||||||
|
|
||||||
int m_Width;
|
bool *m_ParentExitFlag;
|
||||||
int m_Height;
|
|
||||||
|
|
||||||
|
FasTC::CompressionJob m_Job;
|
||||||
CompressionFunc m_CmpFunc;
|
CompressionFunc m_CmpFunc;
|
||||||
|
|
||||||
CompressionFuncWithStats m_CmpFuncWithStats;
|
CompressionFuncWithStats m_CmpFuncWithStats;
|
||||||
std::ostream *m_LogStream;
|
std::ostream *m_LogStream;
|
||||||
|
|
||||||
unsigned char *m_OutBuf;
|
|
||||||
const unsigned char *m_InBuf;
|
|
||||||
|
|
||||||
bool *m_ParentExitFlag;
|
|
||||||
|
|
||||||
CmpThread();
|
CmpThread();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
@ -83,21 +76,17 @@ public:
|
||||||
|
|
||||||
class ThreadGroup {
|
class ThreadGroup {
|
||||||
public:
|
public:
|
||||||
ThreadGroup(
|
ThreadGroup(
|
||||||
int numThreads,
|
uint32 numThreads,
|
||||||
const unsigned char *inBuf,
|
const FasTC::CompressionJob &cj,
|
||||||
unsigned int inBufSz,
|
CompressionFunc func
|
||||||
CompressionFunc func,
|
|
||||||
unsigned char *outBuf
|
|
||||||
);
|
);
|
||||||
|
|
||||||
ThreadGroup(
|
ThreadGroup(
|
||||||
int numThreads,
|
uint32 numThreads,
|
||||||
const unsigned char *inBuf,
|
const FasTC::CompressionJob &cj,
|
||||||
unsigned int inBufSz,
|
CompressionFuncWithStats func,
|
||||||
CompressionFuncWithStats func,
|
std::ostream *logStream
|
||||||
std::ostream *logStream,
|
|
||||||
unsigned char *outBuf
|
|
||||||
);
|
);
|
||||||
|
|
||||||
~ThreadGroup();
|
~ThreadGroup();
|
||||||
|
@ -121,19 +110,16 @@ class ThreadGroup {
|
||||||
TCMutex *const m_FinishMutex;
|
TCMutex *const m_FinishMutex;
|
||||||
TCConditionVariable *const m_FinishCV;
|
TCConditionVariable *const m_FinishCV;
|
||||||
|
|
||||||
static const int kMaxNumThreads = 256;
|
static const uint32 kMaxNumThreads = 256;
|
||||||
const int m_NumThreads;
|
const int m_NumThreads;
|
||||||
|
|
||||||
int m_ActiveThreads;
|
uint32 m_ActiveThreads;
|
||||||
int m_ThreadsFinished;
|
uint32 m_ThreadsFinished;
|
||||||
|
|
||||||
CmpThread m_Threads[kMaxNumThreads];
|
CmpThread m_Threads[kMaxNumThreads];
|
||||||
TCThread *m_ThreadHandles[kMaxNumThreads];
|
TCThread *m_ThreadHandles[kMaxNumThreads];
|
||||||
|
|
||||||
// State variables.
|
FasTC::CompressionJob m_Job;
|
||||||
const unsigned int m_ImageDataSz;
|
|
||||||
const unsigned char *const m_ImageData;
|
|
||||||
unsigned char *m_OutBuf;
|
|
||||||
|
|
||||||
StopWatch m_StopWatch;
|
StopWatch m_StopWatch;
|
||||||
|
|
||||||
|
@ -141,9 +127,6 @@ class ThreadGroup {
|
||||||
bool m_ExitFlag;
|
bool m_ExitFlag;
|
||||||
|
|
||||||
std::ostream *m_LogStream;
|
std::ostream *m_LogStream;
|
||||||
|
|
||||||
const unsigned int m_CompressedBlockSize;
|
|
||||||
const unsigned int m_UncompressedBlockSize;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // _THREAD_GROUP_H_
|
#endif // _THREAD_GROUP_H_
|
||||||
|
|
|
@ -51,6 +51,8 @@
|
||||||
|
|
||||||
#include "BC7Compressor.h"
|
#include "BC7Compressor.h"
|
||||||
|
|
||||||
|
using FasTC::CompressionJob;
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static inline void clamp(T &x, const T &min, const T &max) {
|
static inline void clamp(T &x, const T &min, const T &max) {
|
||||||
if(x < min) x = min;
|
if(x < min) x = min;
|
||||||
|
@ -98,10 +100,19 @@ void WorkerThread::operator()() {
|
||||||
|
|
||||||
case eAction_DoWork:
|
case eAction_DoWork:
|
||||||
{
|
{
|
||||||
const uint8 *src = m_Parent->GetSrcForThread(m_ThreadIdx);
|
const CompressionJob &job = m_Parent->GetCompressionJob();
|
||||||
uint8 *dst = m_Parent->GetDstForThread(m_ThreadIdx);
|
|
||||||
|
|
||||||
CompressionJob cj (src, dst, 4 * m_Parent->GetNumBlocksForThread(m_ThreadIdx), 4);
|
uint32 start[2];
|
||||||
|
m_Parent->GetStartForThread(m_ThreadIdx, start);
|
||||||
|
|
||||||
|
uint32 end[2];
|
||||||
|
m_Parent->GetEndForThread(m_ThreadIdx, end);
|
||||||
|
|
||||||
|
CompressionJob cj (job.Format(),
|
||||||
|
job.InBuf(), job.OutBuf(),
|
||||||
|
job.Width(), job.Height(),
|
||||||
|
start[0], start[1],
|
||||||
|
end[0], end[1]);
|
||||||
if(f)
|
if(f)
|
||||||
(*f)(cj);
|
(*f)(cj);
|
||||||
else
|
else
|
||||||
|
@ -128,10 +139,8 @@ WorkerQueue::WorkerQueue(
|
||||||
uint32 numCompressions,
|
uint32 numCompressions,
|
||||||
uint32 numThreads,
|
uint32 numThreads,
|
||||||
uint32 jobSize,
|
uint32 jobSize,
|
||||||
const uint8 *inBuf,
|
const CompressionJob &job,
|
||||||
uint32 inBufSz,
|
CompressionFunc func
|
||||||
CompressionFunc func,
|
|
||||||
uint8 *outBuf
|
|
||||||
)
|
)
|
||||||
: m_NumCompressions(0)
|
: m_NumCompressions(0)
|
||||||
, m_TotalNumCompressions(std::max(uint32(1), numCompressions))
|
, m_TotalNumCompressions(std::max(uint32(1), numCompressions))
|
||||||
|
@ -139,32 +148,22 @@ WorkerQueue::WorkerQueue(
|
||||||
, m_WaitingThreads(0)
|
, m_WaitingThreads(0)
|
||||||
, m_ActiveThreads(0)
|
, m_ActiveThreads(0)
|
||||||
, m_JobSize(std::max(uint32(1), jobSize))
|
, m_JobSize(std::max(uint32(1), jobSize))
|
||||||
, m_InBufSz(inBufSz)
|
, m_Job(job)
|
||||||
, m_InBuf(inBuf)
|
|
||||||
, m_OutBuf(outBuf)
|
|
||||||
, m_NextBlock(0)
|
, m_NextBlock(0)
|
||||||
, m_CompressionFunc(func)
|
, m_CompressionFunc(func)
|
||||||
, m_CompressionFuncWithStats(NULL)
|
, m_CompressionFuncWithStats(NULL)
|
||||||
, m_LogStream(NULL)
|
, m_LogStream(NULL)
|
||||||
{
|
{
|
||||||
clamp(m_NumThreads, uint32(1), uint32(kMaxNumWorkerThreads));
|
clamp(m_NumThreads, uint32(1), uint32(kMaxNumWorkerThreads));
|
||||||
|
|
||||||
#ifndef NDEBUG
|
|
||||||
if(m_InBufSz % 64) {
|
|
||||||
fprintf(stderr, "WorkerQueue.cpp -- WARNING: InBufSz not a multiple of 64. Are you sure that your image dimensions are correct?\n");
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
WorkerQueue::WorkerQueue(
|
WorkerQueue::WorkerQueue(
|
||||||
uint32 numCompressions,
|
uint32 numCompressions,
|
||||||
uint32 numThreads,
|
uint32 numThreads,
|
||||||
uint32 jobSize,
|
uint32 jobSize,
|
||||||
const uint8 *inBuf,
|
const CompressionJob &job,
|
||||||
uint32 inBufSz,
|
|
||||||
CompressionFuncWithStats func,
|
CompressionFuncWithStats func,
|
||||||
std::ostream *logStream,
|
std::ostream *logStream
|
||||||
uint8 *outBuf
|
|
||||||
)
|
)
|
||||||
: m_NumCompressions(0)
|
: m_NumCompressions(0)
|
||||||
, m_TotalNumCompressions(std::max(uint32(1), numCompressions))
|
, m_TotalNumCompressions(std::max(uint32(1), numCompressions))
|
||||||
|
@ -172,21 +171,13 @@ WorkerQueue::WorkerQueue(
|
||||||
, m_WaitingThreads(0)
|
, m_WaitingThreads(0)
|
||||||
, m_ActiveThreads(0)
|
, m_ActiveThreads(0)
|
||||||
, m_JobSize(std::max(uint32(1), jobSize))
|
, m_JobSize(std::max(uint32(1), jobSize))
|
||||||
, m_InBufSz(inBufSz)
|
, m_Job(job)
|
||||||
, m_InBuf(inBuf)
|
|
||||||
, m_OutBuf(outBuf)
|
|
||||||
, m_NextBlock(0)
|
, m_NextBlock(0)
|
||||||
, m_CompressionFunc(NULL)
|
, m_CompressionFunc(NULL)
|
||||||
, m_CompressionFuncWithStats(func)
|
, m_CompressionFuncWithStats(func)
|
||||||
, m_LogStream(logStream)
|
, m_LogStream(logStream)
|
||||||
{
|
{
|
||||||
clamp(m_NumThreads, uint32(1), uint32(kMaxNumWorkerThreads));
|
clamp(m_NumThreads, uint32(1), uint32(kMaxNumWorkerThreads));
|
||||||
|
|
||||||
#ifndef NDEBUG
|
|
||||||
if(m_InBufSz % 64) {
|
|
||||||
fprintf(stderr, "WorkerQueue.cpp -- WARNING: InBufSz not a multiple of 64. Are you sure that your image dimensions are correct?\n");
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void WorkerQueue::Run() {
|
void WorkerQueue::Run() {
|
||||||
|
@ -234,7 +225,9 @@ WorkerThread::EAction WorkerQueue::AcceptThreadData(uint32 threadIdx) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// How many blocks total do we have?
|
// How many blocks total do we have?
|
||||||
const uint32 totalBlocks = m_InBufSz / 64;
|
uint32 blockDim[2];
|
||||||
|
GetBlockDimensions(m_Job.Format(), blockDim);
|
||||||
|
const uint32 totalBlocks = (m_Job.Width() * m_Job.Height()) / (blockDim[0] * blockDim[1]);
|
||||||
|
|
||||||
// Make sure we have exclusive access...
|
// Make sure we have exclusive access...
|
||||||
TCLock lock(m_Mutex);
|
TCLock lock(m_Mutex);
|
||||||
|
@ -273,28 +266,21 @@ WorkerThread::EAction WorkerQueue::AcceptThreadData(uint32 threadIdx) {
|
||||||
return WorkerThread::eAction_DoWork;
|
return WorkerThread::eAction_DoWork;
|
||||||
}
|
}
|
||||||
|
|
||||||
const uint8 *WorkerQueue::GetSrcForThread(const int threadIdx) const {
|
void WorkerQueue::GetStartForThread(const uint32 threadIdx, uint32 (&start)[2]) {
|
||||||
assert(m_Offsets[threadIdx] >= 0);
|
|
||||||
assert(threadIdx >= 0);
|
assert(threadIdx >= 0);
|
||||||
assert(threadIdx < int(m_NumThreads));
|
assert(threadIdx < int(m_NumThreads));
|
||||||
|
assert(m_Offsets[threadIdx] >= 0);
|
||||||
|
|
||||||
const uint32 inBufBlockSz = 16 * 4;
|
const uint32 blockIdx = m_Offsets[threadIdx];
|
||||||
return m_InBuf + m_Offsets[threadIdx] * inBufBlockSz;
|
m_Job.BlockIdxToCoords(blockIdx, start);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8 *WorkerQueue::GetDstForThread(const int threadIdx) const {
|
void WorkerQueue::GetEndForThread(const uint32 threadIdx, uint32 (&end)[2]) {
|
||||||
assert(m_Offsets[threadIdx] >= 0);
|
|
||||||
assert(threadIdx >= 0);
|
assert(threadIdx >= 0);
|
||||||
assert(threadIdx < int(m_NumThreads));
|
assert(threadIdx < int(m_NumThreads));
|
||||||
|
|
||||||
const uint32 outBufBlockSz = 16;
|
|
||||||
return m_OutBuf + m_Offsets[threadIdx] * outBufBlockSz;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32 WorkerQueue::GetNumBlocksForThread(const int threadIdx) const {
|
|
||||||
assert(m_Offsets[threadIdx] >= 0);
|
assert(m_Offsets[threadIdx] >= 0);
|
||||||
assert(threadIdx >= 0);
|
assert(m_NumBlocks[threadIdx] >= 0);
|
||||||
assert(threadIdx < int(m_NumThreads));
|
|
||||||
|
|
||||||
return m_NumBlocks[threadIdx];
|
const uint32 blockIdx = m_Offsets[threadIdx] + m_NumBlocks[threadIdx];
|
||||||
|
m_Job.BlockIdxToCoords(blockIdx, end);
|
||||||
}
|
}
|
||||||
|
|
|
@ -81,23 +81,19 @@ class WorkerQueue {
|
||||||
public:
|
public:
|
||||||
WorkerQueue(
|
WorkerQueue(
|
||||||
uint32 numCompressions,
|
uint32 numCompressions,
|
||||||
uint32 numThreads,
|
uint32 numThreads,
|
||||||
uint32 jobSize,
|
uint32 jobSize,
|
||||||
const uint8 *inBuf,
|
const FasTC::CompressionJob &job,
|
||||||
uint32 inBufSz,
|
CompressionFunc func
|
||||||
CompressionFunc func,
|
|
||||||
uint8 *outBuf
|
|
||||||
);
|
);
|
||||||
|
|
||||||
WorkerQueue(
|
WorkerQueue(
|
||||||
uint32 numCompressions,
|
uint32 numCompressions,
|
||||||
uint32 numThreads,
|
uint32 numThreads,
|
||||||
uint32 jobSize,
|
uint32 jobSize,
|
||||||
const uint8 *inBuf,
|
const FasTC::CompressionJob &job,
|
||||||
uint32 inBufSz,
|
|
||||||
CompressionFuncWithStats func,
|
CompressionFuncWithStats func,
|
||||||
std::ostream *logStream,
|
std::ostream *logStream
|
||||||
uint8 *outBuf
|
|
||||||
);
|
);
|
||||||
|
|
||||||
~WorkerQueue() { }
|
~WorkerQueue() { }
|
||||||
|
@ -113,9 +109,7 @@ class WorkerQueue {
|
||||||
uint32 m_WaitingThreads;
|
uint32 m_WaitingThreads;
|
||||||
uint32 m_ActiveThreads;
|
uint32 m_ActiveThreads;
|
||||||
uint32 m_JobSize;
|
uint32 m_JobSize;
|
||||||
uint32 m_InBufSz;
|
FasTC::CompressionJob m_Job;
|
||||||
const uint8 *m_InBuf;
|
|
||||||
uint8 *m_OutBuf;
|
|
||||||
|
|
||||||
TCConditionVariable m_CV;
|
TCConditionVariable m_CV;
|
||||||
TCMutex m_Mutex;
|
TCMutex m_Mutex;
|
||||||
|
@ -129,9 +123,9 @@ class WorkerQueue {
|
||||||
WorkerThread *m_Workers[kMaxNumWorkerThreads];
|
WorkerThread *m_Workers[kMaxNumWorkerThreads];
|
||||||
TCThread *m_ThreadHandles[kMaxNumWorkerThreads];
|
TCThread *m_ThreadHandles[kMaxNumWorkerThreads];
|
||||||
|
|
||||||
const uint8 *GetSrcForThread(const int threadIdx) const;
|
const FasTC::CompressionJob &GetCompressionJob() const { return m_Job; }
|
||||||
uint8 *GetDstForThread(const int threadIdx) const;
|
void GetStartForThread(const uint32 threadIdx, uint32 (&start)[2]);
|
||||||
uint32 GetNumBlocksForThread(const int threadIdx) const;
|
void GetEndForThread(const uint32 threadIdx, uint32 (&start)[2]);
|
||||||
|
|
||||||
const CompressionFunc m_CompressionFunc;
|
const CompressionFunc m_CompressionFunc;
|
||||||
CompressionFunc GetCompressionFunc() const { return m_CompressionFunc; }
|
CompressionFunc GetCompressionFunc() const { return m_CompressionFunc; }
|
||||||
|
|
|
@ -16,10 +16,10 @@
|
||||||
namespace DXTC
|
namespace DXTC
|
||||||
{
|
{
|
||||||
// DXT compressor (scalar version).
|
// DXT compressor (scalar version).
|
||||||
void CompressImageDXT1(const CompressionJob &);
|
void CompressImageDXT1(const FasTC::CompressionJob &);
|
||||||
void CompressImageDXT5(const CompressionJob &);
|
void CompressImageDXT5(const FasTC::CompressionJob &);
|
||||||
|
|
||||||
void DecompressDXT1(const DecompressionJob &);
|
void DecompressDXT1(const FasTC::DecompressionJob &);
|
||||||
|
|
||||||
uint16 ColorTo565(const uint8* color);
|
uint16 ColorTo565(const uint8* color);
|
||||||
void EmitByte(uint8*& dest, uint8 b);
|
void EmitByte(uint8*& dest, uint8 b);
|
||||||
|
|
|
@ -25,7 +25,7 @@
|
||||||
namespace DXTC
|
namespace DXTC
|
||||||
{
|
{
|
||||||
// Function prototypes
|
// Function prototypes
|
||||||
void ExtractBlock(const uint8* inPtr, uint32 width, uint8* colorBlock);
|
void ExtractBlock(const uint32* inPtr, uint32 width, uint8* colorBlock);
|
||||||
void GetMinMaxColors(const uint8* colorBlock, uint8* minColor, uint8* maxColor);
|
void GetMinMaxColors(const uint8* colorBlock, uint8* minColor, uint8* maxColor);
|
||||||
void GetMinMaxColorsWithAlpha(const uint8* colorBlock, uint8* minColor, uint8* maxColor);
|
void GetMinMaxColorsWithAlpha(const uint8* colorBlock, uint8* minColor, uint8* maxColor);
|
||||||
void EmitColorIndices(const uint8* colorBlock, uint8*& outBuf, const uint8* minColor, const uint8* maxColor);
|
void EmitColorIndices(const uint8* colorBlock, uint8*& outBuf, const uint8* minColor, const uint8* maxColor);
|
||||||
|
@ -35,23 +35,30 @@ namespace DXTC
|
||||||
// 4-byte RGBA format. The width and height parameters specify the size of the image in pixels.
|
// 4-byte RGBA format. The width and height parameters specify the size of the image in pixels.
|
||||||
// The buffer pointed to by outBuf should be large enough to store the compressed image. This
|
// The buffer pointed to by outBuf should be large enough to store the compressed image. This
|
||||||
// implementation has an 8:1 compression ratio.
|
// implementation has an 8:1 compression ratio.
|
||||||
void CompressImageDXT1(const CompressionJob &cj) {
|
void CompressImageDXT1(const FasTC::CompressionJob &cj) {
|
||||||
uint8 block[64];
|
uint8 block[64];
|
||||||
uint8 minColor[4];
|
uint8 minColor[4];
|
||||||
uint8 maxColor[4];
|
uint8 maxColor[4];
|
||||||
|
|
||||||
uint8 *outBuf = cj.OutBuf();
|
const uint32 kBlockSz = GetBlockSize(FasTC::eCompressionFormat_DXT1);
|
||||||
const uint8 *inBuf = cj.InBuf();
|
const uint32 startBlock = cj.CoordsToBlockIdx(cj.XStart(), cj.YStart());
|
||||||
for(int j = 0; j < cj.Height(); j += 4, inBuf += cj.Width() * 4 * 4)
|
uint8 *outBuf = cj.OutBuf() + startBlock * kBlockSz;
|
||||||
{
|
|
||||||
for(int i = 0; i < cj.Width(); i += 4)
|
const uint32 *inPixels = reinterpret_cast<const uint32 *>(cj.InBuf());
|
||||||
{
|
uint32 startX = cj.XStart();
|
||||||
ExtractBlock(inBuf + i * 4, cj.Width(), block);
|
bool done = false;
|
||||||
|
for(uint32 j = cj.YStart(); !done; j += 4) {
|
||||||
|
for(uint32 i = startX; !done && i < cj.Width(); i += 4) {
|
||||||
|
|
||||||
|
const uint32 kOffset = j*cj.Width() + i;
|
||||||
|
ExtractBlock(inPixels + kOffset, cj.Width(), block);
|
||||||
GetMinMaxColors(block, minColor, maxColor);
|
GetMinMaxColors(block, minColor, maxColor);
|
||||||
EmitWord(outBuf, ColorTo565(maxColor));
|
EmitWord(outBuf, ColorTo565(maxColor));
|
||||||
EmitWord(outBuf, ColorTo565(minColor));
|
EmitWord(outBuf, ColorTo565(minColor));
|
||||||
EmitColorIndices(block, outBuf, minColor, maxColor);
|
EmitColorIndices(block, outBuf, minColor, maxColor);
|
||||||
|
done = i+4 >= cj.XEnd() && j+(i+4 == cj.Width()? 4 : 0) >= cj.YEnd();
|
||||||
}
|
}
|
||||||
|
startX = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -59,18 +66,23 @@ namespace DXTC
|
||||||
// 4-byte RGBA format. The width and height parameters specify the size of the image in pixels.
|
// 4-byte RGBA format. The width and height parameters specify the size of the image in pixels.
|
||||||
// The buffer pointed to by outBuf should be large enough to store the compressed image. This
|
// The buffer pointed to by outBuf should be large enough to store the compressed image. This
|
||||||
// implementation has an 4:1 compression ratio.
|
// implementation has an 4:1 compression ratio.
|
||||||
void CompressImageDXT5(const CompressionJob &cj) {
|
void CompressImageDXT5(const FasTC::CompressionJob &cj) {
|
||||||
uint8 block[64];
|
uint8 block[64];
|
||||||
uint8 minColor[4];
|
uint8 minColor[4];
|
||||||
uint8 maxColor[4];
|
uint8 maxColor[4];
|
||||||
|
|
||||||
uint8 *outBuf = cj.OutBuf();
|
const uint32 kBlockSz = GetBlockSize(FasTC::eCompressionFormat_DXT5);
|
||||||
const uint8 *inBuf = cj.InBuf();
|
const uint32 startBlock = cj.CoordsToBlockIdx(cj.XStart(), cj.YStart());
|
||||||
for(int j = 0; j < cj.Height(); j += 4, inBuf += cj.Width() * 4 * 4)
|
uint8 *outBuf = cj.OutBuf() + startBlock * kBlockSz;
|
||||||
{
|
|
||||||
for(int i = 0; i < cj.Width(); i += 4)
|
const uint32 *inPixels = reinterpret_cast<const uint32 *>(cj.InBuf());
|
||||||
{
|
uint32 startX = cj.XStart();
|
||||||
ExtractBlock(inBuf + i * 4, cj.Width(), block);
|
bool done = false;
|
||||||
|
for(uint32 j = cj.YStart(); !done; j += 4) {
|
||||||
|
for(uint32 i = startX; !done && i < cj.Width(); i += 4) {
|
||||||
|
|
||||||
|
const uint32 kOffset = j*cj.Width() + i;
|
||||||
|
ExtractBlock(inPixels + kOffset, cj.Width(), block);
|
||||||
GetMinMaxColorsWithAlpha(block, minColor, maxColor);
|
GetMinMaxColorsWithAlpha(block, minColor, maxColor);
|
||||||
EmitByte(outBuf, maxColor[3]);
|
EmitByte(outBuf, maxColor[3]);
|
||||||
EmitByte(outBuf, minColor[3]);
|
EmitByte(outBuf, minColor[3]);
|
||||||
|
@ -78,6 +90,7 @@ namespace DXTC
|
||||||
EmitWord(outBuf, ColorTo565(maxColor));
|
EmitWord(outBuf, ColorTo565(maxColor));
|
||||||
EmitWord(outBuf, ColorTo565(minColor));
|
EmitWord(outBuf, ColorTo565(minColor));
|
||||||
EmitColorIndices(block, outBuf, minColor, maxColor);
|
EmitColorIndices(block, outBuf, minColor, maxColor);
|
||||||
|
done = i+4 >= cj.XEnd() && j+(i+4 == cj.Width()? 4 : 0) >= cj.YEnd();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -115,12 +128,12 @@ namespace DXTC
|
||||||
|
|
||||||
// Extract a 4 by 4 block of pixels from inPtr and store it in colorBlock. The width parameter
|
// Extract a 4 by 4 block of pixels from inPtr and store it in colorBlock. The width parameter
|
||||||
// specifies the size of the image in pixels.
|
// specifies the size of the image in pixels.
|
||||||
void ExtractBlock(const uint8* inPtr, uint32 width, uint8* colorBlock)
|
void ExtractBlock(const uint32* inPtr, uint32 width, uint8* colorBlock)
|
||||||
{
|
{
|
||||||
for(int j = 0; j < 4; j++)
|
for(int j = 0; j < 4; j++)
|
||||||
{
|
{
|
||||||
memcpy(&colorBlock[j * 4 * 4], inPtr, 4 * 4);
|
memcpy(&colorBlock[j * 4 * 4], inPtr, 4 * 4);
|
||||||
inPtr += width * 4;
|
inPtr += width;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -129,7 +142,7 @@ namespace DXTC
|
||||||
// channel.
|
// channel.
|
||||||
void GetMinMaxColors(const uint8* colorBlock, uint8* minColor, uint8* maxColor)
|
void GetMinMaxColors(const uint8* colorBlock, uint8* minColor, uint8* maxColor)
|
||||||
{
|
{
|
||||||
int32 i;
|
uint32 i;
|
||||||
uint8 inset[3];
|
uint8 inset[3];
|
||||||
|
|
||||||
minColor[0] = minColor[1] = minColor[2] = 255;
|
minColor[0] = minColor[1] = minColor[2] = 255;
|
||||||
|
@ -177,7 +190,7 @@ namespace DXTC
|
||||||
// the extents of the bounding box of the color space. This function includes the alpha channel.
|
// the extents of the bounding box of the color space. This function includes the alpha channel.
|
||||||
void GetMinMaxColorsWithAlpha(const uint8* colorBlock, uint8* minColor, uint8* maxColor)
|
void GetMinMaxColorsWithAlpha(const uint8* colorBlock, uint8* minColor, uint8* maxColor)
|
||||||
{
|
{
|
||||||
int32 i;
|
uint32 i;
|
||||||
uint8 inset[4];
|
uint8 inset[4];
|
||||||
|
|
||||||
minColor[0] = minColor[1] = minColor[2] = minColor[3] = 255;
|
minColor[0] = minColor[1] = minColor[2] = minColor[3] = 255;
|
||||||
|
@ -299,7 +312,7 @@ namespace DXTC
|
||||||
|
|
||||||
colorBlock += 3;
|
colorBlock += 3;
|
||||||
|
|
||||||
for(int i = 0; i < 16; i++) {
|
for(uint32 i = 0; i < 16; i++) {
|
||||||
uint8 a = colorBlock[i * 4];
|
uint8 a = colorBlock[i * 4];
|
||||||
int32 b1 = (a <= ab1);
|
int32 b1 = (a <= ab1);
|
||||||
int32 b2 = (a <= ab2);
|
int32 b2 = (a <= ab2);
|
||||||
|
|
|
@ -90,7 +90,7 @@ namespace DXTC
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void DecompressDXT1(const DecompressionJob &dcj)
|
void DecompressDXT1(const FasTC::DecompressionJob &dcj)
|
||||||
{
|
{
|
||||||
assert(!(dcj.Height() & 3));
|
assert(!(dcj.Height() & 3));
|
||||||
assert(!(dcj.Width() & 3));
|
assert(!(dcj.Width() & 3));
|
||||||
|
@ -98,13 +98,13 @@ namespace DXTC
|
||||||
uint32 blockW = dcj.Width() >> 2;
|
uint32 blockW = dcj.Width() >> 2;
|
||||||
uint32 blockH = dcj.Height() >> 2;
|
uint32 blockH = dcj.Height() >> 2;
|
||||||
|
|
||||||
const uint32 blockSz = 8;
|
const uint32 blockSz = GetBlockSize(FasTC::eCompressionFormat_DXT1);
|
||||||
|
|
||||||
uint32 *outPixels = reinterpret_cast<uint32 *>(dcj.OutBuf());
|
uint32 *outPixels = reinterpret_cast<uint32 *>(dcj.OutBuf());
|
||||||
|
|
||||||
uint32 outBlock[16];
|
uint32 outBlock[16];
|
||||||
for(int j = 0; j < blockH; j++) {
|
for(uint32 j = 0; j < blockH; j++) {
|
||||||
for(int i = 0; i < blockW; i++) {
|
for(uint32 i = 0; i < blockW; i++) {
|
||||||
|
|
||||||
uint32 offset = (j * blockW + i) * blockSz;
|
uint32 offset = (j * blockW + i) * blockSz;
|
||||||
DecompressDXT1Block(dcj.InBuf() + offset, outBlock);
|
DecompressDXT1Block(dcj.InBuf() + offset, outBlock);
|
||||||
|
|
|
@ -61,13 +61,13 @@ namespace ETCC {
|
||||||
// Takes a stream of compressed ETC1 data and decompresses it into R8G8B8A8
|
// Takes a stream of compressed ETC1 data and decompresses it into R8G8B8A8
|
||||||
// format. The width and height must be specified in order to properly
|
// format. The width and height must be specified in order to properly
|
||||||
// decompress the data.
|
// decompress the data.
|
||||||
void Decompress(const DecompressionJob &);
|
void Decompress(const FasTC::DecompressionJob &);
|
||||||
|
|
||||||
// Takes a stream of uncompressed RGBA8 data and compresses it into ETC1
|
// Takes a stream of uncompressed RGBA8 data and compresses it into ETC1
|
||||||
// version one. The width and height must be specified in order to properly
|
// version one. The width and height must be specified in order to properly
|
||||||
// decompress the data. This uses the library created by Rich Geldreich found here:
|
// decompress the data. This uses the library created by Rich Geldreich found here:
|
||||||
// https://code.google.com/p/rg-etc1
|
// https://code.google.com/p/rg-etc1
|
||||||
void Compress_RG(const CompressionJob &);
|
void Compress_RG(const FasTC::CompressionJob &);
|
||||||
|
|
||||||
} // namespace PVRTCC
|
} // namespace PVRTCC
|
||||||
|
|
||||||
|
|
|
@ -52,32 +52,37 @@
|
||||||
|
|
||||||
#include "rg_etc1.h"
|
#include "rg_etc1.h"
|
||||||
#include "ETCCompressor.h"
|
#include "ETCCompressor.h"
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
namespace ETCC {
|
namespace ETCC {
|
||||||
|
|
||||||
void Compress_RG(const CompressionJob &cj) {
|
void Compress_RG(const FasTC::CompressionJob &cj) {
|
||||||
|
|
||||||
rg_etc1::etc1_pack_params params;
|
rg_etc1::etc1_pack_params params;
|
||||||
params.m_quality = rg_etc1::cLowQuality;
|
params.m_quality = rg_etc1::cLowQuality;
|
||||||
rg_etc1::pack_etc1_block_init();
|
rg_etc1::pack_etc1_block_init();
|
||||||
|
|
||||||
// Assume block-stream order
|
const uint32 kBlockSz = GetBlockSize(FasTC::eCompressionFormat_ETC1);
|
||||||
uint32 blockSizeX = cj.Width() / 4;
|
const uint32 startBlock = cj.CoordsToBlockIdx(cj.XStart(), cj.YStart());
|
||||||
uint32 blockSizeY = cj.Height() / 4;
|
uint8 *outBuf = cj.OutBuf() + startBlock * kBlockSz;
|
||||||
|
uint32 startX = cj.XStart();
|
||||||
|
bool done = false;
|
||||||
|
for(uint32 j = cj.YStart(); !done; j += 4) {
|
||||||
|
for(uint32 i = startX; !done && i < cj.Width(); i += 4) {
|
||||||
|
|
||||||
for(uint32 j = 0; j < blockSizeY; j++)
|
uint32 pixels[16];
|
||||||
for(uint32 i = 0; i < blockSizeX; i++) {
|
const uint32 *inPixels = reinterpret_cast<const uint32 *>(cj.InBuf());
|
||||||
uint32 pixels[16];
|
memcpy(pixels, inPixels + j*cj.Width() + i, 4 * sizeof(uint32));
|
||||||
uint32 blockIdx = j*blockSizeX + i;
|
memcpy(pixels + 4, inPixels + (j+1)*cj.Width() + i, 4 * sizeof(uint32));
|
||||||
|
memcpy(pixels + 8, inPixels + (j+2)*cj.Width() + i, 4 * sizeof(uint32));
|
||||||
|
memcpy(pixels + 12, inPixels + (j+3)*cj.Width() + i, 4 * sizeof(uint32));
|
||||||
|
|
||||||
for(uint32 y = 0; y < 4; y++) {
|
pack_etc1_block(outBuf, pixels, params);
|
||||||
for(uint32 x = 0; x < 4; x++) {
|
|
||||||
const uint32 *in = reinterpret_cast<const uint32 *>(cj.InBuf());
|
outBuf += kBlockSz;
|
||||||
pixels[y*4 + x] = in[(j*4 + y)*cj.Width() + (i*4 + x)];
|
done = i+4 >= cj.XEnd() && j+(i+4 == cj.Width()? 4 : 0) >= cj.YEnd();
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
startX = 0;
|
||||||
pack_etc1_block(cj.OutBuf() + blockIdx * 8, pixels, params);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // namespace PVRTCC
|
} // namespace PVRTCC
|
||||||
|
|
|
@ -55,7 +55,7 @@
|
||||||
|
|
||||||
namespace ETCC {
|
namespace ETCC {
|
||||||
|
|
||||||
void Decompress(const DecompressionJob &cj) {
|
void Decompress(const FasTC::DecompressionJob &cj) {
|
||||||
|
|
||||||
uint32 blocksX = cj.Width() / 4;
|
uint32 blocksX = cj.Width() / 4;
|
||||||
uint32 blocksY = cj.Height() / 4;
|
uint32 blocksY = cj.Height() / 4;
|
||||||
|
|
|
@ -69,7 +69,7 @@ namespace PVRTCC {
|
||||||
// Takes a stream of compressed PVRTC data and decompresses it into R8G8B8A8
|
// Takes a stream of compressed PVRTC data and decompresses it into R8G8B8A8
|
||||||
// format. The width and height must be specified in order to properly
|
// format. The width and height must be specified in order to properly
|
||||||
// decompress the data.
|
// decompress the data.
|
||||||
void Decompress(const DecompressionJob &,
|
void Decompress(const FasTC::DecompressionJob &,
|
||||||
bool bTwoBitMode = false,
|
bool bTwoBitMode = false,
|
||||||
const EWrapMode wrapMode = eWrapMode_Wrap,
|
const EWrapMode wrapMode = eWrapMode_Wrap,
|
||||||
bool bDebugImages = false);
|
bool bDebugImages = false);
|
||||||
|
@ -77,12 +77,12 @@ namespace PVRTCC {
|
||||||
// Takes a stream of uncompressed RGBA8 data and compresses it into PVRTC
|
// Takes a stream of uncompressed RGBA8 data and compresses it into PVRTC
|
||||||
// version one. The width and height must be specified in order to properly
|
// version one. The width and height must be specified in order to properly
|
||||||
// decompress the data.
|
// decompress the data.
|
||||||
void Compress(const CompressionJob &,
|
void Compress(const FasTC::CompressionJob &,
|
||||||
bool bTwoBitMode = false,
|
bool bTwoBitMode = false,
|
||||||
const EWrapMode wrapMode = eWrapMode_Wrap);
|
const EWrapMode wrapMode = eWrapMode_Wrap);
|
||||||
|
|
||||||
#ifdef PVRTEXLIB_FOUND
|
#ifdef PVRTEXLIB_FOUND
|
||||||
void CompressPVRLib(const CompressionJob &,
|
void CompressPVRLib(const FasTC::CompressionJob &,
|
||||||
bool bTwoBitMode = false,
|
bool bTwoBitMode = false,
|
||||||
const EWrapMode wrapMode = eWrapMode_Wrap);
|
const EWrapMode wrapMode = eWrapMode_Wrap);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -917,7 +917,7 @@ namespace PVRTCC {
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void Compress(const CompressionJob &cj, bool bTwoBit, EWrapMode wrapMode) {
|
void Compress(const FasTC::CompressionJob &cj, bool bTwoBit, EWrapMode wrapMode) {
|
||||||
const uint32 width = cj.Width();
|
const uint32 width = cj.Width();
|
||||||
const uint32 height = cj.Height();
|
const uint32 height = cj.Height();
|
||||||
|
|
||||||
|
@ -925,6 +925,11 @@ namespace PVRTCC {
|
||||||
assert((width & (width - 1)) == 0);
|
assert((width & (width - 1)) == 0);
|
||||||
assert((height & (height - 1)) == 0);
|
assert((height & (height - 1)) == 0);
|
||||||
|
|
||||||
|
// Make sure that we aren't doing any shenanigans with threading or otherwise
|
||||||
|
// assuming that we're not ending at the end of the texture...
|
||||||
|
assert(cj.XStart() == 0 && cj.YStart() == 0);
|
||||||
|
assert(cj.XEnd() == cj.Width() && cj.YEnd() == cj.Width());
|
||||||
|
|
||||||
CompressionLabel *labels =
|
CompressionLabel *labels =
|
||||||
(CompressionLabel *)calloc(width * height, sizeof(CompressionLabel));
|
(CompressionLabel *)calloc(width * height, sizeof(CompressionLabel));
|
||||||
|
|
||||||
|
|
|
@ -60,9 +60,9 @@
|
||||||
|
|
||||||
namespace PVRTCC {
|
namespace PVRTCC {
|
||||||
|
|
||||||
void CompressPVRLib(const CompressionJob &cj,
|
void CompressPVRLib(const FasTC::CompressionJob &cj,
|
||||||
bool bTwoBitMode,
|
bool bTwoBitMode,
|
||||||
const EWrapMode) {
|
const EWrapMode) {
|
||||||
pvrtexture::CPVRTextureHeader pvrTexHdr;
|
pvrtexture::CPVRTextureHeader pvrTexHdr;
|
||||||
pvrTexHdr.setPixelFormat(pvrtexture::PVRStandard8PixelType);
|
pvrTexHdr.setPixelFormat(pvrtexture::PVRStandard8PixelType);
|
||||||
pvrTexHdr.setWidth(cj.Width());
|
pvrTexHdr.setWidth(cj.Width());
|
||||||
|
|
|
@ -273,7 +273,7 @@ namespace PVRTCC {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Decompress(const DecompressionJob &dcj,
|
void Decompress(const FasTC::DecompressionJob &dcj,
|
||||||
const bool bTwoBitMode,
|
const bool bTwoBitMode,
|
||||||
const EWrapMode wrapMode,
|
const EWrapMode wrapMode,
|
||||||
bool bDebugImages) {
|
bool bDebugImages) {
|
||||||
|
|
|
@ -81,7 +81,8 @@ class ImageTester {
|
||||||
|
|
||||||
uint32 *outPixels = new uint32[w * h];
|
uint32 *outPixels = new uint32[w * h];
|
||||||
|
|
||||||
DecompressionJob dcj(data, reinterpret_cast<uint8 *>(outPixels), w, h);
|
FasTC::DecompressionJob dcj(FasTC::eCompressionFormat_PVRTC,
|
||||||
|
data, reinterpret_cast<uint8 *>(outPixels), w, h);
|
||||||
#ifdef OUTPUT_DEBUG_IMAGE
|
#ifdef OUTPUT_DEBUG_IMAGE
|
||||||
PVRTCC::Decompress(dcj, twobpp, PVRTCC::eWrapMode_Wrap, true);
|
PVRTCC::Decompress(dcj, twobpp, PVRTCC::eWrapMode_Wrap, true);
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -56,6 +56,8 @@
|
||||||
|
|
||||||
#include "PVRTCCompressor.h"
|
#include "PVRTCCompressor.h"
|
||||||
|
|
||||||
|
static const FasTC::ECompressionFormat kFmt = FasTC::eCompressionFormat_PVRTC;
|
||||||
|
|
||||||
TEST(Decompressor, DecompressWhite) {
|
TEST(Decompressor, DecompressWhite) {
|
||||||
const uint32 kWidth = 32;
|
const uint32 kWidth = 32;
|
||||||
const uint32 kHeight = 32;
|
const uint32 kHeight = 32;
|
||||||
|
@ -69,7 +71,7 @@ TEST(Decompressor, DecompressWhite) {
|
||||||
|
|
||||||
uint8 outData[4 * kWidth * kHeight];
|
uint8 outData[4 * kWidth * kHeight];
|
||||||
|
|
||||||
DecompressionJob dcj (pvrData, outData, kWidth, kHeight);
|
FasTC::DecompressionJob dcj (kFmt, pvrData, outData, kWidth, kHeight);
|
||||||
PVRTCC::Decompress(dcj);
|
PVRTCC::Decompress(dcj);
|
||||||
|
|
||||||
for(uint32 i = 0; i < kWidth; i++) {
|
for(uint32 i = 0; i < kWidth; i++) {
|
||||||
|
@ -94,7 +96,7 @@ TEST(Decompressor, DecompressGray) {
|
||||||
|
|
||||||
uint8 outData[4 * kWidth * kHeight];
|
uint8 outData[4 * kWidth * kHeight];
|
||||||
|
|
||||||
DecompressionJob dcj (pvrData, outData, kWidth, kHeight);
|
FasTC::DecompressionJob dcj (kFmt, pvrData, outData, kWidth, kHeight);
|
||||||
PVRTCC::Decompress(dcj);
|
PVRTCC::Decompress(dcj);
|
||||||
|
|
||||||
for(uint32 i = 0; i < kWidth; i++) {
|
for(uint32 i = 0; i < kWidth; i++) {
|
||||||
|
|
Loading…
Reference in a new issue