mirror of
https://github.com/yuzu-emu/FasTC.git
synced 2025-01-23 18:31:02 +00:00
Add first pass of atomic implementation.
This is a first pass of what I believe to be a not too terrible implementation of a cooperative thread-based compressor. The idea is simple... If a compressor is invoked with the same parameters on multiple threads, then the threads cooperate via an atomic counter to compress the texture. Each thread can take as long as possible until the texture is finished. If a caller calls a compression routine that has different parameters, then it will help the current compression finish before starting on its own compression. In this way, we can split the textures up among the threads and guarantee that we maximize the resource usage between them. I.e. this becomes more efficient: Thread 1: Thread 2: Thread N: tex0 texN tex(N-1)N tex1 texN+1 tex(N-1)(N+1) .. .. .. texN-1 tex2N tex(N-1)N I have not tested this for bugs, so I'm still not completely convinced that it is deadlock-free although it should be...
This commit is contained in:
parent
4d6e75ab97
commit
53fe825e49
|
@ -123,6 +123,18 @@ namespace BC7C
|
|||
void CompressImageBC7SIMD(const unsigned char* inBuf, unsigned char* outBuf, unsigned int width, unsigned int height);
|
||||
#endif
|
||||
|
||||
#ifdef HAS_ATOMICS
|
||||
// This is a threadsafe version of the compression function. Once it is called on a certain block of data, it will
|
||||
// compress the entire amount of data. However, if the function is called multiple times from multiple threads then they
|
||||
// will all dispatch to compress the data that they can and the one that finishes the compression resets the function.
|
||||
//
|
||||
// The function should be used as follows:
|
||||
// for(int i = 0; i < NTHREADS; i++) {
|
||||
// startThread(function, args);
|
||||
// join_threads();
|
||||
void CompressImageBC7Atomic(const unsigned char *inBuf, unsigned char *outBuf, unsigned int width, unsigned int height);
|
||||
#endif
|
||||
|
||||
// Decompress the image given as BC7 data to R8G8B8A8 format. Width and Height are the dimensions of the image in pixels.
|
||||
void DecompressImageBC7(const unsigned char* inBuf, unsigned char* outBuf, unsigned int width, unsigned int height);
|
||||
}
|
||||
|
|
|
@ -1531,6 +1531,104 @@ namespace BC7C
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef HAS_ATOMICS
|
||||
#ifdef HAS_MSVC_ATOMICS
|
||||
static uint32 TestAndSet(uint32 *x) {
|
||||
return InterlockedExchange(x, 1);
|
||||
}
|
||||
|
||||
static uint32 FetchAndAdd(uint32 *x) {
|
||||
return InterlockedIncrement(x);
|
||||
}
|
||||
|
||||
static void ResetTestAndSet(uint *x) {
|
||||
*x = 0;
|
||||
}
|
||||
#elif defined HAS_GCC_ATOMICS
|
||||
static uint32 TestAndSet(uint32 *x) {
|
||||
return __sync_lock_test_and_set(x, 1);
|
||||
}
|
||||
|
||||
static uint32 FetchAndAdd(uint32 *x) {
|
||||
return __sync_fetch_and_add(x, 1);
|
||||
}
|
||||
|
||||
static void ResetTestAndSet(uint32 *x) {
|
||||
__sync_lock_release(x);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Variables used for synchronization in threadsafe implementation.
|
||||
static ALIGN(32) uint32 _currentBlock = 0;
|
||||
static ALIGN(32) uint32 _initialized = 0;
|
||||
static const unsigned char *_inBuf;
|
||||
static unsigned char *_outBuf;
|
||||
static bool _initializedFlag = false;
|
||||
|
||||
void CompressImageBC7Atomic(
|
||||
const unsigned char *inBuf,
|
||||
unsigned char *outBuf,
|
||||
unsigned int width,
|
||||
unsigned int height
|
||||
) {
|
||||
|
||||
bool myData = false;
|
||||
while(!myData) {
|
||||
|
||||
// Have we initialized any data?
|
||||
if(!TestAndSet(&_initialized)) {
|
||||
|
||||
// I'm the first one here... initialize MY data...
|
||||
|
||||
const int kMaxIters = BC7CompressionMode::kMaxAnnealingIterations;
|
||||
BC7CompressionMode::MaxAnnealingIterations = min(kMaxIters, GetQualityLevel());
|
||||
|
||||
_currentBlock = 0;
|
||||
|
||||
_inBuf = inBuf;
|
||||
_outBuf = outBuf;
|
||||
myData = true;
|
||||
|
||||
_initializedFlag = true;
|
||||
}
|
||||
|
||||
// We've initialized data... is it mine?
|
||||
else if(_inBuf == inBuf && _outBuf == outBuf) {
|
||||
myData = true;
|
||||
}
|
||||
|
||||
const uint32 nBlocks = (height * width) / 16;
|
||||
|
||||
// Make sure that whoever is initializing data is working on it...
|
||||
while(!_initializedFlag && _currentBlock < nBlocks) {
|
||||
YieldThread();
|
||||
}
|
||||
|
||||
// Help finish whatever texture we're compressing before we start again on my work...
|
||||
uint32 blockIdx;
|
||||
while((blockIdx = FetchAndAdd(&_currentBlock)) < nBlocks) {
|
||||
unsigned char *out = _outBuf + (16 * blockIdx);
|
||||
const unsigned char *in = _inBuf + (64 * blockIdx);
|
||||
|
||||
CompressBC7Block((const uint32 *)in, out);
|
||||
YieldThread(); // Just to give other threads a chance to make some progress
|
||||
}
|
||||
|
||||
// If we've allocated someone to compress the last block, then reset the initialization...
|
||||
if(blockIdx == nBlocks) {
|
||||
_initializedFlag = false;
|
||||
ResetTestAndSet(&_initialized);
|
||||
}
|
||||
else if(blockIdx > nBlocks) {
|
||||
// Wait for last block to finish..
|
||||
while(_initialized) {
|
||||
YieldThread();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // HAS_ATOMICS
|
||||
|
||||
void CompressImageBC7Stats(
|
||||
const unsigned char *inBuf,
|
||||
unsigned char *outBuf,
|
||||
|
|
Loading…
Reference in a new issue