mirror of
https://github.com/yuzu-emu/FasTC.git
synced 2025-01-24 13:41:00 +00:00
Fleshes out the worker queue implementation.
This commit is contained in:
parent
62ca4ffee0
commit
c7bb6170f3
|
@ -6,10 +6,13 @@
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
#include "BC7Compressor.h"
|
#include "BC7Compressor.h"
|
||||||
|
#include "WorkerQueue.h"
|
||||||
#include "ThreadGroup.h"
|
#include "ThreadGroup.h"
|
||||||
|
|
||||||
#include "ImageFile.h"
|
#include "ImageFile.h"
|
||||||
#include "Image.h"
|
#include "Image.h"
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static T min(const T &a, const T &b) {
|
static T min(const T &a, const T &b) {
|
||||||
return (a < b)? a : b;
|
return (a < b)? a : b;
|
||||||
|
@ -125,12 +128,22 @@ static double CompressImageWithThreads(
|
||||||
}
|
}
|
||||||
|
|
||||||
static double CompressImageWithWorkerQueue(
|
static double CompressImageWithWorkerQueue(
|
||||||
const ImageFile &img,
|
const unsigned char *imgData,
|
||||||
|
const unsigned int imgDataSz,
|
||||||
const SCompressionSettings &settings,
|
const SCompressionSettings &settings,
|
||||||
const CompressionFunc f,
|
const CompressionFunc f,
|
||||||
unsigned char *outBuf
|
unsigned char *outBuf
|
||||||
) {
|
) {
|
||||||
return 0.0;
|
WorkerQueue wq (
|
||||||
|
settings.iNumThreads,
|
||||||
|
settings.iJobSize,
|
||||||
|
imgData,
|
||||||
|
imgDataSz,
|
||||||
|
f,
|
||||||
|
outBuf
|
||||||
|
);
|
||||||
|
|
||||||
|
wq.Run();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CompressImageData(
|
bool CompressImageData(
|
||||||
|
@ -178,7 +191,10 @@ bool CompressImageData(
|
||||||
double cmpMSTime = 0.0;
|
double cmpMSTime = 0.0;
|
||||||
|
|
||||||
if(settings.iNumThreads > 1) {
|
if(settings.iNumThreads > 1) {
|
||||||
cmpMSTime = CompressImageWithThreads(data, dataSz, settings, f, cmpData);
|
if(settings.iJobSize > 0)
|
||||||
|
cmpMSTime = CompressImageWithWorkerQueue(data, dataSz, settings, f, cmpData);
|
||||||
|
else
|
||||||
|
cmpMSTime = CompressImageWithThreads(data, dataSz, settings, f, cmpData);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
cmpMSTime = CompressImageInSerial(data, dataSz, settings, f, cmpData);
|
cmpMSTime = CompressImageInSerial(data, dataSz, settings, f, cmpData);
|
||||||
|
|
|
@ -6,9 +6,22 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
#include <boost/thread/thread.hpp>
|
#include <boost/thread/thread.hpp>
|
||||||
#include <boost/thread/barrier.hpp>
|
|
||||||
#include <boost/thread/mutex.hpp>
|
template <typename T>
|
||||||
#include <boost/thread/condition_variable.hpp>
|
static inline T max(const T &a, const T &b) {
|
||||||
|
return (a > b)? a : b;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static inline T min(const T &a, const T &b) {
|
||||||
|
return (a < b)? a : b;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static inline void clamp(T &x, const T &min, const T &max) {
|
||||||
|
if(x < min) x = min;
|
||||||
|
else if(x > max) x = max;
|
||||||
|
}
|
||||||
|
|
||||||
WorkerThread::WorkerThread(WorkerQueue * parent, uint32 idx)
|
WorkerThread::WorkerThread(WorkerQueue * parent, uint32 idx)
|
||||||
: m_ThreadIdx(idx)
|
: m_ThreadIdx(idx)
|
||||||
|
@ -28,159 +41,131 @@ void WorkerThread::operator()() {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
while(1) {
|
||||||
|
|
||||||
|
EAction action = m_Parent->AcceptThreadData(m_ThreadIdx);
|
||||||
|
if(eAction_Quit == action) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
const uint8 *src = m_Parent->GetSrcForThread(m_ThreadIdx);
|
||||||
|
uint8 *dst = m_Parent->GetDstForThread(m_ThreadIdx);
|
||||||
|
(*f)(src, dst, 4 * m_Parent->GetNumBlocksForThread(m_ThreadIdx), 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
m_Parent->NotifyWorkerFinished();
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
WorkerQueue::WorkerQueue(
|
||||||
ThreadGroup::ThreadGroup( int numThreads, const unsigned char *inBuf, unsigned int inBufSz, CompressionFunc func, unsigned char *outBuf )
|
uint32 numThreads,
|
||||||
: m_StartBarrier(new boost::barrier(numThreads + 1))
|
uint32 jobSize,
|
||||||
, m_FinishMutex(new boost::mutex())
|
const uint8 *inBuf,
|
||||||
, m_FinishCV(new boost::condition_variable())
|
uint32 inBufSz,
|
||||||
, m_NumThreads(numThreads)
|
CompressionFunc func,
|
||||||
|
uint8 *outBuf
|
||||||
|
)
|
||||||
|
: m_NumThreads(numThreads)
|
||||||
, m_ActiveThreads(0)
|
, m_ActiveThreads(0)
|
||||||
, m_Func(func)
|
, m_JobSize(max(uint32(1), jobSize))
|
||||||
, m_ImageDataSz(inBufSz)
|
, m_InBufSz(inBufSz)
|
||||||
, m_ImageData(inBuf)
|
, m_InBuf(inBuf)
|
||||||
, m_OutBuf(outBuf)
|
, m_OutBuf(outBuf)
|
||||||
, m_ThreadState(eThreadState_Done)
|
, m_CompressionFunc(func)
|
||||||
, m_ExitFlag(false)
|
|
||||||
{
|
{
|
||||||
for(int i = 0; i < kMaxNumThreads; i++) {
|
clamp(m_NumThreads, uint32(1), kMaxNumWorkerThreads);
|
||||||
// Thread synchronization primitives
|
|
||||||
m_Threads[i].m_ParentCounterLock = m_FinishMutex;
|
#ifndef NDEBUG
|
||||||
m_Threads[i].m_FinishCV = m_FinishCV;
|
if(m_InBufSz % 64) {
|
||||||
m_Threads[i].m_ParentCounter = &m_ThreadsFinished;
|
fprintf(stderr, "WorkerQueue.cpp -- WARNING: InBufSz not a multiple of 64. Are you sure that your image dimensions are correct?");
|
||||||
m_Threads[i].m_StartBarrier = m_StartBarrier;
|
|
||||||
m_Threads[i].m_ParentExitFlag = &m_ExitFlag;
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
ThreadGroup::~ThreadGroup() {
|
|
||||||
delete m_StartBarrier;
|
|
||||||
delete m_FinishMutex;
|
|
||||||
delete m_FinishCV;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned int ThreadGroup::GetCompressedBlockSize() {
|
|
||||||
if(m_Func == BC7C::CompressImageBC7) return 16;
|
|
||||||
#ifdef HAS_SSE_41
|
|
||||||
if(m_Func == BC7C::CompressImageBC7SIMD) return 16;
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int ThreadGroup::GetUncompressedBlockSize() {
|
void WorkerQueue::Run() {
|
||||||
if(m_Func == BC7C::CompressImageBC7) return 64;
|
|
||||||
#ifdef HAS_SSE_41
|
|
||||||
if(m_Func == BC7C::CompressImageBC7SIMD) return 64;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
bool ThreadGroup::PrepareThreads() {
|
// Spawn a bunch of threads...
|
||||||
|
boost::unique_lock<boost::mutex> lock(m_Mutex);
|
||||||
// Make sure that threads aren't running.
|
|
||||||
if(m_ThreadState != eThreadState_Done) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Have we already activated the thread group?
|
|
||||||
if(m_ActiveThreads > 0) {
|
|
||||||
m_ThreadState = eThreadState_Waiting;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// We can assume that the image data is in block stream order
|
|
||||||
// so, the size of the data given to each thread will be (nb*4)x4
|
|
||||||
int numBlocks = m_ImageDataSz / 64;
|
|
||||||
|
|
||||||
int blocksProcessed = 0;
|
|
||||||
int blocksPerThread = (numBlocks/m_NumThreads) + ((numBlocks % m_NumThreads)? 1 : 0);
|
|
||||||
|
|
||||||
// Currently no threads are finished...
|
|
||||||
m_ThreadsFinished = 0;
|
|
||||||
for(int i = 0; i < m_NumThreads; i++) {
|
for(int i = 0; i < m_NumThreads; i++) {
|
||||||
|
WorkerThread t (this, i);
|
||||||
if(m_ActiveThreads >= kMaxNumThreads)
|
|
||||||
break;
|
|
||||||
|
|
||||||
int numBlocksThisThread = blocksPerThread;
|
|
||||||
if(blocksProcessed + numBlocksThisThread > numBlocks) {
|
|
||||||
numBlocksThisThread = numBlocks - blocksProcessed;
|
|
||||||
}
|
|
||||||
|
|
||||||
CmpThread &t = m_Threads[m_ActiveThreads];
|
|
||||||
t.m_Height = 4;
|
|
||||||
t.m_Width = numBlocksThisThread * 4;
|
|
||||||
t.m_CmpFunc = m_Func;
|
|
||||||
t.m_OutBuf = m_OutBuf + (blocksProcessed * GetCompressedBlockSize());
|
|
||||||
t.m_InBuf = m_ImageData + (blocksProcessed * GetUncompressedBlockSize());
|
|
||||||
|
|
||||||
blocksProcessed += numBlocksThisThread;
|
|
||||||
|
|
||||||
m_ThreadHandles[m_ActiveThreads] = new boost::thread(t);
|
m_ThreadHandles[m_ActiveThreads] = new boost::thread(t);
|
||||||
|
|
||||||
m_ActiveThreads++;
|
m_ActiveThreads++;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_ThreadState = eThreadState_Waiting;
|
// Wait for them to finish...
|
||||||
return true;
|
while(m_ActiveThreads > 0) {
|
||||||
}
|
m_CV.wait(lock);
|
||||||
|
|
||||||
bool ThreadGroup::Start() {
|
|
||||||
|
|
||||||
if(m_ActiveThreads <= 0) {
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if(m_ThreadState != eThreadState_Waiting) {
|
// Join them all together..
|
||||||
return false;
|
for(int i = 0; i < m_NumThreads; i++) {
|
||||||
}
|
|
||||||
|
|
||||||
m_StopWatch.Reset();
|
|
||||||
m_StopWatch.Start();
|
|
||||||
|
|
||||||
// Last thread to activate the barrier is this one.
|
|
||||||
m_ThreadState = eThreadState_Running;
|
|
||||||
m_StartBarrier->wait();
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool ThreadGroup::CleanUpThreads() {
|
|
||||||
|
|
||||||
// Are the threads currently running?
|
|
||||||
if(m_ThreadState == eThreadState_Running) {
|
|
||||||
// ... if so, wait for them to finish
|
|
||||||
Join();
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(m_ThreadState == eThreadState_Done || m_ThreadState == eThreadState_Waiting);
|
|
||||||
|
|
||||||
// Mark all threads for exit
|
|
||||||
m_ExitFlag = true;
|
|
||||||
|
|
||||||
// Hit the barrier to signal them to go.
|
|
||||||
m_StartBarrier->wait();
|
|
||||||
|
|
||||||
// Clean up.
|
|
||||||
for(int i = 0; i < m_ActiveThreads; i++) {
|
|
||||||
m_ThreadHandles[i]->join();
|
m_ThreadHandles[i]->join();
|
||||||
delete m_ThreadHandles[i];
|
delete m_ThreadHandles[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reset active number of threads...
|
|
||||||
m_ActiveThreads = 0;
|
|
||||||
m_ExitFlag = false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ThreadGroup::Join() {
|
void WorkerQueue::NotifyWorkerFinished() {
|
||||||
|
{
|
||||||
|
boost::lock_guard<boost::mutex> lock(m_Mutex);
|
||||||
|
m_ActiveThreads--;
|
||||||
|
}
|
||||||
|
m_CV.notify_one();
|
||||||
|
}
|
||||||
|
|
||||||
boost::unique_lock<boost::mutex> lock(*m_FinishMutex);
|
WorkerThread::EAction WorkerQueue::AcceptThreadData(uint32 threadIdx) {
|
||||||
while(m_ThreadsFinished != m_ActiveThreads) {
|
if(threadIdx < 0 || threadIdx >= m_ActiveThreads) {
|
||||||
m_FinishCV->wait(lock);
|
return WorkerThread::eAction_Quit;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_StopWatch.Stop();
|
// How many blocks total do we have?
|
||||||
m_ThreadState = eThreadState_Done;
|
const uint32 totalBlocks = m_InBufSz / 64;
|
||||||
m_ThreadsFinished = 0;
|
|
||||||
|
// Make sure we have exclusive access...
|
||||||
|
boost::lock_guard<boost::mutex> lock(m_Mutex);
|
||||||
|
|
||||||
|
// If we've completed all blocks, then mark the thread for
|
||||||
|
// completion.
|
||||||
|
if(m_NextBlock >= totalBlocks) {
|
||||||
|
return WorkerThread::eAction_Quit;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise, this thread's offset is the current block...
|
||||||
|
m_Offsets[threadIdx] = m_NextBlock;
|
||||||
|
|
||||||
|
// The number of blocks to process is either the job size
|
||||||
|
// or the number of blocks remaining.
|
||||||
|
int blocksProcessed = min(m_JobSize, totalBlocks - m_NextBlock);
|
||||||
|
m_NumBlocks[threadIdx] = blocksProcessed;
|
||||||
|
|
||||||
|
// Make sure the next block is updated.
|
||||||
|
m_NextBlock += blocksProcessed;
|
||||||
|
|
||||||
|
return WorkerThread::eAction_DoWork;
|
||||||
|
}
|
||||||
|
|
||||||
|
const uint8 *WorkerQueue::GetSrcForThread(const int threadIdx) const {
|
||||||
|
assert(m_Offsets[threadIdx] >= 0);
|
||||||
|
assert(threadIdx >= 0);
|
||||||
|
assert(threadIdx < m_NumThreads);
|
||||||
|
|
||||||
|
const uint32 inBufBlockSz = 16 * 4;
|
||||||
|
return m_InBuf + m_Offsets[threadIdx] * inBufBlockSz;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8 *WorkerQueue::GetDstForThread(const int threadIdx) const {
|
||||||
|
assert(m_Offsets[threadIdx] >= 0);
|
||||||
|
assert(threadIdx >= 0);
|
||||||
|
assert(threadIdx < m_NumThreads);
|
||||||
|
|
||||||
|
const uint32 outBufBlockSz = 16;
|
||||||
|
return m_OutBuf + m_Offsets[threadIdx] * outBufBlockSz;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32 WorkerQueue::GetNumBlocksForThread(const int threadIdx) const {
|
||||||
|
assert(m_Offsets[threadIdx] >= 0);
|
||||||
|
assert(threadIdx >= 0);
|
||||||
|
assert(threadIdx < m_NumThreads);
|
||||||
|
|
||||||
|
return m_NumBlocks[threadIdx];
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
|
@ -1,18 +1,18 @@
|
||||||
#ifndef __TEXCOMP_WORKDER_QUEUE_H__
|
#ifndef __TEXCOMP_WORKDER_QUEUE_H__
|
||||||
#define __TEXCOMP_WORKDER_QUEUE_H__
|
#define __TEXCOMP_WORKDER_QUEUE_H__
|
||||||
|
|
||||||
#include "TexCompTypes.h"
|
|
||||||
#include "TexComp.h"
|
|
||||||
|
|
||||||
// Forward declare...
|
// Forward declare...
|
||||||
class WorkerQueue;
|
class WorkerQueue;
|
||||||
namespace boost {
|
namespace boost {
|
||||||
class thread;
|
class thread;
|
||||||
class mutex;
|
|
||||||
class barrier;
|
|
||||||
class condition_variable;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Necessary includes...
|
||||||
|
#include "TexCompTypes.h"
|
||||||
|
#include "TexComp.h"
|
||||||
|
#include <boost/thread/mutex.hpp>
|
||||||
|
#include <boost/thread/condition_variable.hpp>
|
||||||
|
|
||||||
struct WorkerThread {
|
struct WorkerThread {
|
||||||
friend class WorkerQueue;
|
friend class WorkerQueue;
|
||||||
public:
|
public:
|
||||||
|
@ -20,6 +20,13 @@ public:
|
||||||
WorkerThread(WorkerQueue *, uint32 idx);
|
WorkerThread(WorkerQueue *, uint32 idx);
|
||||||
void operator ()();
|
void operator ()();
|
||||||
|
|
||||||
|
enum EAction {
|
||||||
|
eAction_DoWork,
|
||||||
|
eAction_Quit,
|
||||||
|
|
||||||
|
kNumWorkerThreadActions
|
||||||
|
};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
uint32 m_ThreadIdx;
|
uint32 m_ThreadIdx;
|
||||||
WorkerQueue *const m_Parent;
|
WorkerQueue *const m_Parent;
|
||||||
|
@ -39,21 +46,37 @@ class WorkerQueue {
|
||||||
|
|
||||||
~WorkerQueue() { }
|
~WorkerQueue() { }
|
||||||
|
|
||||||
// Runs the
|
// Runs the workers
|
||||||
void Run();
|
void Run();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
static const int kMaxNumWorkerThreads = 256;
|
uint32 m_NumThreads;
|
||||||
int m_Offsets[kMaxNumWorkerThreads];
|
uint32 m_ActiveThreads;
|
||||||
|
uint32 m_JobSize;
|
||||||
|
uint32 m_InBufSz;
|
||||||
|
const uint8 *m_InBuf;
|
||||||
|
uint8 *m_OutBuf;
|
||||||
|
|
||||||
int GetOffsetForThread(const int threadIdx) const;
|
boost::condition_variable m_CV;
|
||||||
|
boost::mutex m_Mutex;
|
||||||
|
uint32 m_NextBlock;
|
||||||
|
|
||||||
|
static const uint32 kMaxNumWorkerThreads = 256;
|
||||||
|
uint32 m_Offsets[kMaxNumWorkerThreads];
|
||||||
|
uint32 m_NumBlocks[kMaxNumWorkerThreads];
|
||||||
|
|
||||||
|
boost::thread *m_ThreadHandles[kMaxNumWorkerThreads];
|
||||||
|
|
||||||
|
const uint8 *GetSrcForThread(const int threadIdx) const;
|
||||||
|
uint8 *GetDstForThread(const int threadIdx) const;
|
||||||
|
uint32 GetNumBlocksForThread(const int threadIdx) const;
|
||||||
|
|
||||||
const CompressionFunc m_CompressionFunc;
|
const CompressionFunc m_CompressionFunc;
|
||||||
CompressionFunc GetCompressionFunc() const { return m_CompressionFunc; }
|
CompressionFunc GetCompressionFunc() const { return m_CompressionFunc; }
|
||||||
|
|
||||||
void SignalThreadReady(int threadIdx);
|
WorkerThread::EAction AcceptThreadData(uint32 threadIdx);
|
||||||
bool AcceptThreadData(int threadIdx) const;
|
void NotifyWorkerFinished();
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif //__TEXCOMP_WORKDER_QUEUE_H__
|
#endif //__TEXCOMP_WORKDER_QUEUE_H__
|
||||||
|
|
Loading…
Reference in a new issue