mirror of
https://github.com/yuzu-emu/FasTC.git
synced 2025-01-23 19:51:19 +00:00
Update atomics compression algorithm
In general, we want to use this algorithm only with self-contained compression lists. As such, we've added all of the proper synchronization primitives in the list object itself. That way, different threads that are working on the same list will be able to communicate. Ideally, this should eliminate the number of user-space context switches that happen. Whether or not this is faster than the other synchronization algorithms that we've tried remains to be seen...
This commit is contained in:
parent
abd3961a09
commit
435f935de3
|
@ -64,6 +64,7 @@
|
|||
//--------------------------------------------------------------------------------------
|
||||
|
||||
#include "BC7Config.h"
|
||||
#include "CompressionJob.h"
|
||||
|
||||
class BlockStatManager;
|
||||
|
||||
|
@ -124,15 +125,10 @@ namespace BC7C
|
|||
#endif
|
||||
|
||||
#ifdef HAS_ATOMICS
|
||||
// This is a threadsafe version of the compression function. Once it is called on a certain block of data, it will
|
||||
// compress the entire amount of data. However, if the function is called multiple times from multiple threads then they
|
||||
// will all dispatch to compress the data that they can and the one that finishes the compression resets the function.
|
||||
//
|
||||
// The function should be used as follows:
|
||||
// for(int i = 0; i < NTHREADS; i++) {
|
||||
// startThread(function, args);
|
||||
// join_threads();
|
||||
void CompressImageBC7Atomic(const unsigned char *inBuf, unsigned char *outBuf, unsigned int width, unsigned int height);
|
||||
// This is a threadsafe version of the compression function that is designed to compress a list of
|
||||
// textures. If this function is called with the same argument from multiple threads, they will work
|
||||
// together to compress all of the images in the list.
|
||||
void CompressAtomic(CompressionJobList &);
|
||||
#endif
|
||||
|
||||
// Decompress the image given as BC7 data to R8G8B8A8 format. Width and Height are the dimensions of the image in pixels.
|
||||
|
|
|
@ -91,13 +91,6 @@
|
|||
#include <cfloat>
|
||||
#include <ctime>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# define ALIGN(x) __declspec( align(x) )
|
||||
#else
|
||||
# define ALIGN(x) __attribute__((aligned(x)))
|
||||
#endif
|
||||
#define ALIGN_SSE ALIGN(16)
|
||||
|
||||
// #define USE_PCA_FOR_SHAPE_ESTIMATION
|
||||
|
||||
enum EBlockStats {
|
||||
|
@ -1424,6 +1417,8 @@ namespace BC7C
|
|||
static int gQualityLevel = 50;
|
||||
void SetQualityLevel(int q) {
|
||||
gQualityLevel = std::max(0, q);
|
||||
const int kMaxIters = BC7CompressionMode::kMaxAnnealingIterations;
|
||||
BC7CompressionMode::MaxAnnealingIterations = std::min(kMaxIters, GetQualityLevel());
|
||||
}
|
||||
int GetQualityLevel() { return gQualityLevel; }
|
||||
|
||||
|
@ -1556,72 +1551,33 @@ namespace BC7C
|
|||
#endif
|
||||
|
||||
// Variables used for synchronization in threadsafe implementation.
|
||||
static ALIGN(32) uint32 _currentBlock = 0;
|
||||
static ALIGN(32) uint32 _initialized = 0;
|
||||
static const unsigned char *_inBuf;
|
||||
static unsigned char *_outBuf;
|
||||
static bool _initializedFlag = false;
|
||||
void CompressAtomic(CompressionJobList &cjl) {
|
||||
|
||||
uint32 jobIdx;
|
||||
while((jobIdx = cjl.m_CurrentJobIndex) < cjl.GetNumJobs()) {
|
||||
|
||||
void CompressImageBC7Atomic(
|
||||
const unsigned char *inBuf,
|
||||
unsigned char *outBuf,
|
||||
unsigned int width,
|
||||
unsigned int height
|
||||
) {
|
||||
// !HACK! ... Microsoft has this defined
|
||||
#undef GetJob
|
||||
|
||||
bool myData = false;
|
||||
while(!myData) {
|
||||
|
||||
// Have we initialized any data?
|
||||
if(!TestAndSet(&_initialized)) {
|
||||
|
||||
// I'm the first one here... initialize MY data...
|
||||
|
||||
const int kMaxIters = BC7CompressionMode::kMaxAnnealingIterations;
|
||||
BC7CompressionMode::MaxAnnealingIterations = std::min(kMaxIters, GetQualityLevel());
|
||||
|
||||
_currentBlock = 0;
|
||||
|
||||
_inBuf = inBuf;
|
||||
_outBuf = outBuf;
|
||||
myData = true;
|
||||
|
||||
_initializedFlag = true;
|
||||
}
|
||||
|
||||
// We've initialized data... is it mine?
|
||||
else if(_inBuf == inBuf && _outBuf == outBuf) {
|
||||
myData = true;
|
||||
}
|
||||
|
||||
const uint32 nBlocks = (height * width) / 16;
|
||||
|
||||
// Make sure that whoever is initializing data is working on it...
|
||||
while(!_initializedFlag && _currentBlock < nBlocks) {
|
||||
YieldThread();
|
||||
}
|
||||
const CompressionJob *cj = cjl.GetJob(jobIdx);
|
||||
const uint32 nBlocks = (cj->height * cj->width) / 16;
|
||||
|
||||
// Help finish whatever texture we're compressing before we start again on my work...
|
||||
uint32 blockIdx;
|
||||
while((blockIdx = FetchAndAdd(&_currentBlock)) < nBlocks) {
|
||||
unsigned char *out = _outBuf + (16 * blockIdx);
|
||||
const unsigned char *in = _inBuf + (64 * blockIdx);
|
||||
while((blockIdx = FetchAndAdd(&(cjl.m_CurrentBlockIndex))) < nBlocks) {
|
||||
unsigned char *out = cj->outBuf + (16 * blockIdx);
|
||||
const unsigned char *in = cj->inBuf + (64 * blockIdx);
|
||||
|
||||
CompressBC7Block((const uint32 *)in, out);
|
||||
YieldThread(); // Just to give other threads a chance to make some progress
|
||||
}
|
||||
|
||||
// If we've allocated someone to compress the last block, then reset the initialization...
|
||||
if(blockIdx == nBlocks) {
|
||||
_initializedFlag = false;
|
||||
ResetTestAndSet(&_initialized);
|
||||
}
|
||||
else if(blockIdx > nBlocks) {
|
||||
// Wait for last block to finish..
|
||||
while(_initialized) {
|
||||
YieldThread();
|
||||
}
|
||||
if(TestAndSet(cjl.GetFinishedFlag(jobIdx))) {
|
||||
cjl.m_CurrentJobIndex++;
|
||||
cjl.m_CurrentBlockIndex = 0;
|
||||
}
|
||||
|
||||
// Wait until this texture finishes.
|
||||
while(cjl.m_CurrentJobIndex = jobIdx);
|
||||
}
|
||||
}
|
||||
#endif // HAS_ATOMICS
|
||||
|
|
|
@ -45,10 +45,10 @@
|
|||
#define _TEX_COMP_H_
|
||||
|
||||
#include "CompressedImage.h"
|
||||
#include "CompressionJob.h"
|
||||
|
||||
// Forward declarations
|
||||
class ImageFile;
|
||||
class CompressedImage;
|
||||
class BlockStatManager;
|
||||
|
||||
struct SCompressionSettings {
|
||||
|
|
|
@ -105,11 +105,6 @@ static CompressionFunc ChooseFuncFromSettings(const SCompressionSettings &s) {
|
|||
return BC7C::CompressImageBC7SIMD;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAS_ATOMICS
|
||||
if(s.bUseAtomics)
|
||||
return BC7C::CompressImageBC7Atomic;
|
||||
#endif
|
||||
return BC7C::CompressImageBC7;
|
||||
}
|
||||
break;
|
||||
|
@ -162,38 +157,30 @@ static double CompressImageInSerial(
|
|||
}
|
||||
|
||||
class AtomicThreadUnit : public TCCallable {
|
||||
const unsigned char *const m_InBuf;
|
||||
unsigned char *m_OutBuf;
|
||||
const unsigned int m_Height;
|
||||
const unsigned int m_Width;
|
||||
CompressionJobList &m_CompressionJobList;
|
||||
TCBarrier *m_Barrier;
|
||||
const unsigned int m_NumCompressions;
|
||||
CompressionFunc m_CmpFnc;
|
||||
|
||||
public:
|
||||
AtomicThreadUnit(
|
||||
const unsigned char *const inBuf,
|
||||
unsigned char *outBuf,
|
||||
const unsigned int height,
|
||||
const unsigned int width,
|
||||
CompressionJobList &_cjl,
|
||||
TCBarrier *barrier,
|
||||
const unsigned int nCompressions,
|
||||
CompressionFunc f
|
||||
) : TCCallable(),
|
||||
m_InBuf(inBuf),
|
||||
m_OutBuf(outBuf),
|
||||
m_Height(height),
|
||||
m_Width(width),
|
||||
m_CompressionJobList(_cjl),
|
||||
m_Barrier(barrier),
|
||||
m_NumCompressions(nCompressions),
|
||||
m_CmpFnc(f)
|
||||
{ }
|
||||
|
||||
virtual ~AtomicThreadUnit() { }
|
||||
virtual void operator()() {
|
||||
m_Barrier->Wait();
|
||||
for(uint32 i = 0; i < m_NumCompressions; i++)
|
||||
(*m_CmpFnc)(m_InBuf, m_OutBuf, m_Width, m_Height);
|
||||
if(m_CmpFnc == BC7C::Compress) {
|
||||
BC7C::CompressAtomic(m_CompressionJobList);
|
||||
}
|
||||
else {
|
||||
assert(!"I don't know what we're compressing...");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -205,23 +192,35 @@ static double CompressImageWithAtomics(
|
|||
) {
|
||||
CompressionFunc f = ChooseFuncFromSettings(settings);
|
||||
|
||||
// Setup compression list...
|
||||
const int nTimes = settings.iNumCompressions;
|
||||
CompressionJobList cjl (nTimes);
|
||||
for(int i = 0; i < nTimes; i++) {
|
||||
if(!cjl.AddJob(CompressionJob(imgData, outBuf, height, width))) {
|
||||
assert(!"Error adding compression job to job list!");
|
||||
}
|
||||
}
|
||||
|
||||
const int nThreads = settings.iNumThreads;
|
||||
|
||||
// Allocate resources...
|
||||
TCBarrier barrier (nThreads);
|
||||
TCBarrier barrier (nThreads+1);
|
||||
TCThread **threads = (TCThread **)malloc(nThreads * sizeof(TCThread *));
|
||||
AtomicThreadUnit **units = (AtomicThreadUnit **)malloc(nThreads * sizeof(AtomicThreadUnit *));
|
||||
|
||||
// Launch threads...
|
||||
StopWatch sw;
|
||||
sw.Start();
|
||||
for(int i = 0; i < nThreads; i++) {
|
||||
AtomicThreadUnit *u = new AtomicThreadUnit(imgData, outBuf, height, width, &barrier, nTimes, f);
|
||||
AtomicThreadUnit *u = new AtomicThreadUnit(cjl, &barrier, f);
|
||||
threads[i] = new TCThread(*u);
|
||||
units[i] = u;
|
||||
}
|
||||
|
||||
// Wait here to make sure that our timer is correct...
|
||||
barrier.Wait();
|
||||
|
||||
StopWatch sw;
|
||||
sw.Start();
|
||||
|
||||
// Wait for threads to finish
|
||||
for(int i = 0; i < nThreads; i++) {
|
||||
threads[i]->Join();
|
||||
|
|
Loading…
Reference in a new issue