FasTC/CLTool/main.cpp
2012-08-24 15:56:45 -04:00

2127 lines
70 KiB
C++
Executable file

//--------------------------------------------------------------------------------------
// Copyright 2011 Intel Corporation
// All Rights Reserved
//
// Permission is granted to use, copy, distribute and prepare derivative works of this
// software for any purpose and without fee, provided, that the above copyright notice
// and this statement appear in all copies. Intel makes no representations about the
// suitability of this software for any purpose. THIS SOFTWARE IS PROVIDED "AS IS."
// INTEL SPECIFICALLY DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, AND ALL LIABILITY,
// INCLUDING CONSEQUENTIAL AND OTHER INDIRECT DAMAGES, FOR THE USE OF THIS SOFTWARE,
// INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PROPRIETARY RIGHTS, AND INCLUDING THE
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. Intel does not
// assume any responsibility for any errors which may appear in this software nor any
// responsibility to update it.
//
//--------------------------------------------------------------------------------------
#include "DXUT.h"
#include "DXUTcamera.h"
#include "DXUTgui.h"
#include "DXUTsettingsDlg.h"
#include "SDKmisc.h"
#include "SDKMesh.h"
#include "DXTCompressorDLL.h" // DXT compressor DLL.
#include "BC7CompressorDLL.h" // BC7 compressor DLL.
#include "StopWatch.h" // Timer.
#include "TaskMgrTBB.h" // TBB task manager.
#include <tchar.h>
#include <strsafe.h>
#define ALIGN16(x) __declspec(align(16)) x
#define ALIGN32(x) __declspec(align(32)) x
// DXT compressor type.
enum ECompressorType
{
eCompType_DXT1,
eCompType_DXT5,
eCompType_BC7,
kNumCompressorTypes
};
const TCHAR *kCompressorTypeStr[kNumCompressorTypes] = {
_T("DXT1/BC1"),
_T("DXT5/BC3"),
_T("BC7"),
};
enum EInstructionSet
{
eInstrSet_Scalar
, eInstrSet_SSE
, eInstrSet_AVX2
, kNumInstructionSets
};
const TCHAR *kInstructionSetStr[kNumInstructionSets] = {
_T("Scalar"),
_T("SSE"),
_T("AVX2"),
};
enum EThreadMode
{
eThreadMode_None,
eThreadMode_TBB,
eThreadMode_Win32,
kNumThreadModes
};
const TCHAR *kThreadModeStr[kNumThreadModes] = {
_T("None"),
_T("TBB"),
_T("Win32")
};
static BOOL g_DXT1Available = TRUE;
static BOOL g_AVX2Available = FALSE;
static BOOL g_DX11Available = FALSE;
const struct ECompressionScheme {
const ECompressorType type;
const EInstructionSet instrSet;
const EThreadMode threadMode;
const BOOL &availabilityOverride;
} kCompressionSchemes[] = {
{ eCompType_DXT1, eInstrSet_Scalar, eThreadMode_None, g_DXT1Available },
{ eCompType_DXT1, eInstrSet_Scalar, eThreadMode_TBB, g_DXT1Available },
{ eCompType_DXT1, eInstrSet_Scalar, eThreadMode_Win32, g_DXT1Available },
{ eCompType_DXT1, eInstrSet_SSE, eThreadMode_None, g_DXT1Available },
{ eCompType_DXT1, eInstrSet_SSE, eThreadMode_TBB, g_DXT1Available },
{ eCompType_DXT1, eInstrSet_SSE, eThreadMode_Win32, g_DXT1Available },
{ eCompType_DXT5, eInstrSet_Scalar, eThreadMode_None, g_DXT1Available },
{ eCompType_DXT5, eInstrSet_Scalar, eThreadMode_TBB, g_DXT1Available },
{ eCompType_DXT5, eInstrSet_Scalar, eThreadMode_Win32, g_DXT1Available },
{ eCompType_DXT5, eInstrSet_SSE, eThreadMode_None, g_DXT1Available },
{ eCompType_DXT5, eInstrSet_SSE, eThreadMode_TBB, g_DXT1Available },
{ eCompType_DXT5, eInstrSet_SSE, eThreadMode_Win32, g_DXT1Available },
{ eCompType_BC7, eInstrSet_Scalar, eThreadMode_None, g_DX11Available },
{ eCompType_BC7, eInstrSet_Scalar, eThreadMode_Win32, g_DX11Available },
{ eCompType_BC7, eInstrSet_SSE, eThreadMode_None, g_DX11Available },
{ eCompType_BC7, eInstrSet_SSE, eThreadMode_Win32, g_DX11Available },
{ eCompType_DXT1, eInstrSet_AVX2, eThreadMode_None, g_AVX2Available },
{ eCompType_DXT1, eInstrSet_AVX2, eThreadMode_TBB, g_AVX2Available },
{ eCompType_DXT1, eInstrSet_AVX2, eThreadMode_Win32, g_AVX2Available },
{ eCompType_DXT5, eInstrSet_AVX2, eThreadMode_None, g_AVX2Available },
{ eCompType_DXT5, eInstrSet_AVX2, eThreadMode_TBB, g_AVX2Available },
{ eCompType_DXT5, eInstrSet_AVX2, eThreadMode_Win32, g_AVX2Available },
};
const int kNumCompressionSchemes = sizeof(kCompressionSchemes) / sizeof(kCompressionSchemes[0]);
const ECompressionScheme *gCompressionScheme = kCompressionSchemes;
// Textured vertex.
struct Vertex
{
D3DXVECTOR3 position;
D3DXVECTOR2 texCoord;
};
// Global variables
CDXUTDialogResourceManager gDialogResourceManager; // manager for shared resources of dialogs
CD3DSettingsDlg gD3DSettingsDlg; // Device settings dialog
CDXUTDialog gHUD; // manages the 3D
CDXUTDialog gSampleUI; // dialog for sample specific controls
bool gShowHelp = false; // If true, it renders the UI control text
CDXUTTextHelper* gTxtHelper = NULL;
double gCompTime = 0.0;
double gCompRate = 0.0;
int gBlocksPerTask = 256;
int gFrameNum = 0;
int gFrameDelay = 100;
int gTexWidth = 0;
int gTexHeight = 0;
double gError = 0.0;
#ifdef REPORT_RMSE
static const WCHAR *kErrorStr = L"Root Mean Squared Error";
#else
static const WCHAR *kErrorStr = L"Peak Signal/Noise Ratio";
#endif
ID3D11DepthStencilState* gDepthStencilState = NULL;
UINT gStencilReference = 0;
ID3D11InputLayout* gVertexLayout = NULL;
ID3D11Buffer* gVertexBuffer = NULL;
ID3D11Buffer* gQuadVB = NULL;
ID3D11Buffer* gIndexBuffer = NULL;
ID3D11VertexShader* gVertexShader = NULL;
ID3D11PixelShader* gRenderFramePS = NULL;
ID3D11PixelShader* gRenderTexturePS = NULL;
ID3D11SamplerState* gSamPoint = NULL;
ID3D11ShaderResourceView* gUncompressedSRV = NULL; // Shader resource view for the uncompressed texture resource.
ID3D11ShaderResourceView* gCompressedSRV = NULL; // Shader resource view for the compressed texture resource.
ID3D11ShaderResourceView* gErrorSRV = NULL; // Shader resource view for the error texture.
// Win32 thread API
const int kMaxWinThreads = 16;
enum EThreadState {
eThreadState_WaitForData,
eThreadState_DataLoaded,
eThreadState_Running,
eThreadState_Done
};
typedef void (* CompressionFunc)(const BYTE* inBuf, BYTE* outBuf, int width, int height);
struct WinThreadData {
EThreadState state;
int threadIdx;
const BYTE *inBuf;
BYTE *outBuf;
int width;
int height;
void (*cmpFunc)(const BYTE* inBuf, BYTE* outBuf, int width, int height);
// Defaults..
WinThreadData() :
state(eThreadState_Done),
threadIdx(-1),
inBuf(NULL),
outBuf(NULL),
width(-1),
height(-1),
cmpFunc(NULL)
{ }
} gWinThreadData[kMaxWinThreads];
HANDLE gWinThreadWorkEvent[kMaxWinThreads];
HANDLE gWinThreadStartEvent = NULL;
HANDLE gWinThreadDoneEvent = NULL;
int gNumWinThreads = 0;
DWORD gNumProcessors = 1; // We have at least one processor.
DWORD dwThreadIdArray[kMaxWinThreads];
HANDLE hThreadArray[kMaxWinThreads];
// UI control IDs
#define IDC_TOGGLEFULLSCREEN 1
#define IDC_TOGGLEREF 2
#define IDC_CHANGEDEVICE 3
#define IDC_UNCOMPRESSEDTEXT 4
#define IDC_COMPRESSEDTEXT 5
#define IDC_ERRORTEXT 6
#define IDC_SIZETEXT 7
#define IDC_TIMETEXT 8
#define IDC_RATETEXT 9
#define IDC_TBB 10
#define IDC_SIMD 11
#define IDC_COMPRESSOR 12
#define IDC_BLOCKSPERTASKTEXT 13
#define IDC_BLOCKSPERTASK 14
#define IDC_LOADTEXTURE 15
#define IDC_RECOMPRESS 16
#define IDC_RMSETEXT 17
// Forward declarations
bool CALLBACK ModifyDeviceSettings( DXUTDeviceSettings* pDeviceSettings, void* pUserContext );
void CALLBACK OnFrameMove( double fTime, float fElapsedTime, void* pUserContext );
LRESULT CALLBACK MsgProc( HWND hWnd, UINT uMsg, WPARAM wParam, LPARAM lParam, bool* pbNoFurtherProcessing,
void* pUserContext );
void CALLBACK OnKeyboard( UINT nChar, bool bKeyDown, bool bAltDown, void* pUserContext );
void CALLBACK OnGUIEvent( UINT nEvent, int nControlID, CDXUTControl* pControl, void* pUserContext );
bool CALLBACK IsD3D11DeviceAcceptable(const CD3D11EnumAdapterInfo *AdapterInfo, UINT Output, const CD3D11EnumDeviceInfo *DeviceInfo,
DXGI_FORMAT BackBufferFormat, bool bWindowed, void* pUserContext );
HRESULT CALLBACK OnD3D11CreateDevice( ID3D11Device* pd3dDevice, const DXGI_SURFACE_DESC* pBackBufferSurfaceDesc,
void* pUserContext );
HRESULT CALLBACK OnD3D11ResizedSwapChain( ID3D11Device* pd3dDevice, IDXGISwapChain* pSwapChain,
const DXGI_SURFACE_DESC* pBackBufferSurfaceDesc, void* pUserContext );
void CALLBACK OnD3D11ReleasingSwapChain( void* pUserContext );
void CALLBACK OnD3D11DestroyDevice( void* pUserContext );
void CALLBACK OnD3D11FrameRender( ID3D11Device* pd3dDevice, ID3D11DeviceContext* pd3dImmediateContext, double fTime,
float fElapsedTime, void* pUserContext );
void InitApp();
void RenderText();
void UpdateBlockSlider();
void UpdateCompressionAlgorithms();
void UpdateThreadingMode();
void UpdateCompressionModes();
void UpdateAllowedSettings();
void SetCompressionScheme(EInstructionSet instrSet, ECompressorType compType, EThreadMode threadMode);
HRESULT CreateTextures(LPTSTR file);
void DestroyTextures();
HRESULT LoadTexture(LPTSTR file);
HRESULT PadTexture(ID3D11ShaderResourceView** textureSRV);
HRESULT SaveTexture(ID3D11ShaderResourceView* textureSRV, LPTSTR file);
HRESULT CompressTexture(ID3D11ShaderResourceView* uncompressedSRV, ID3D11ShaderResourceView** compressedSRV);
HRESULT ComputeError(ID3D11ShaderResourceView* uncompressedSRV, ID3D11ShaderResourceView* compressedSRV, ID3D11ShaderResourceView** errorSRV);
HRESULT RecompressTexture();
void ComputeRMSE(const BYTE *errorData, const INT width, const INT height);
void InitWin32Threads();
void DestroyThreads();
void StoreDepthStencilState();
void RestoreDepthStencilState();
HRESULT DisableDepthTest();
namespace DXTC
{
VOID CompressImageDXT(const BYTE* inBuf, BYTE* outBuf, INT width, INT height);
VOID CompressImageDXTNoThread(const BYTE* inBuf, BYTE* outBuf, INT width, INT height);
VOID CompressImageDXTTBB(const BYTE* inBuf, BYTE* outBuf, INT width, INT height);
VOID CompressImageDXTWIN(const BYTE* inBuf, BYTE* outBuf, INT width, INT height);
DWORD WINAPI CompressImageDXTWinThread( LPVOID lpParam );
}
#ifdef ENABLE_AVX2
#ifdef _M_X64
/* On x64, we can't have inline assembly in C files, see avxtest.asm */
extern "C" int __stdcall supports_AVX2();
#else ifdef WIN32
/* AVX2 instructions require 64 bit mode. */
extern "C" int __stdcall supports_AVX2() {
return 0;
}
#endif // _M_X64
#endif // ENABLE_AVX2
int WINAPI wWinMain( HINSTANCE hInstance, HINSTANCE hPrevInstance, LPWSTR lpCmdLine, int nCmdShow )
{
// Enable run-time memory check for debug builds.
#if defined(DEBUG) | defined(_DEBUG)
_CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
#endif
#ifdef ENABLE_AVX2
g_AVX2Available = supports_AVX2();
#endif
// Make sure that the event array is set to null...
memset(gWinThreadWorkEvent, 0, sizeof(gWinThreadWorkEvent));
// Figure out how many cores there are on this machine
SYSTEM_INFO sysinfo;
GetSystemInfo(&sysinfo);
gNumProcessors = sysinfo.dwNumberOfProcessors;
// Make sure all of our threads are empty.
for(int i = 0; i < kMaxWinThreads; i++) {
hThreadArray[i] = NULL;
}
// Set DXUT callbacks
DXUTSetCallbackDeviceChanging( ModifyDeviceSettings );
DXUTSetCallbackMsgProc( MsgProc );
DXUTSetCallbackKeyboard( OnKeyboard );
DXUTSetCallbackFrameMove( OnFrameMove );
DXUTSetCallbackD3D11DeviceAcceptable( IsD3D11DeviceAcceptable );
DXUTSetCallbackD3D11DeviceCreated( OnD3D11CreateDevice );
DXUTSetCallbackD3D11SwapChainResized( OnD3D11ResizedSwapChain );
DXUTSetCallbackD3D11FrameRender( OnD3D11FrameRender );
DXUTSetCallbackD3D11SwapChainReleasing( OnD3D11ReleasingSwapChain );
DXUTSetCallbackD3D11DeviceDestroyed( OnD3D11DestroyDevice );
InitApp();
DXUTInit( true, true, NULL );
DXUTSetCursorSettings( true, true );
DXUTCreateWindow( L"Fast Texture Compressor" );
// Try to create a device with DX11 feature set
DXUTCreateDevice (D3D_FEATURE_LEVEL_11_0, true, 1280, 1024 );
// If we don't have an adequate driver, then we revert to DX10 feature set...
DXUTDeviceSettings settings = DXUTGetDeviceSettings();
if(settings.d3d11.DriverType == D3D_DRIVER_TYPE_UNKNOWN || settings.d3d11.DriverType == D3D_DRIVER_TYPE_NULL) {
DXUTCreateDevice(D3D_FEATURE_LEVEL_10_1, true, 1280, 1024);
// !HACK! Force enumeration here in order to relocate hardware with new feature level
DXUTGetD3D11Enumeration(true);
DXUTCreateDevice(D3D_FEATURE_LEVEL_10_1, true, 1280, 1024);
const TCHAR *noDx11msg = _T("Your hardware does not seem to support DX11. BC7 Compression is disabled.");
MessageBox(NULL, noDx11msg, _T("Error"), MB_OK);
}
else {
g_DX11Available = TRUE;
}
// Now that we know what things are allowed, update the available options.
UpdateAllowedSettings();
DXUTMainLoop();
// Destroy all of the threads...
DestroyThreads();
return DXUTGetExitCode();
}
// Initialize the app
void InitApp()
{
// Initialize dialogs
gD3DSettingsDlg.Init(&gDialogResourceManager);
gHUD.Init(&gDialogResourceManager);
gSampleUI.Init(&gDialogResourceManager);
gHUD.SetCallback(OnGUIEvent);
int x = 0;
int y = 10;
gHUD.AddButton(IDC_TOGGLEFULLSCREEN, L"Toggle full screen", x, y, 170, 23);
gHUD.AddButton(IDC_TOGGLEREF, L"Toggle REF (F3)", x, y += 26, 170, 23, VK_F3);
gHUD.AddButton(IDC_CHANGEDEVICE, L"Change device (F2)", x, y += 26, 170, 23, VK_F2);
gSampleUI.SetCallback(OnGUIEvent);
x = 0;
y = 0;
gSampleUI.AddStatic(IDC_UNCOMPRESSEDTEXT, L"Uncompressed", x, y, 125, 22);
gSampleUI.AddStatic(IDC_COMPRESSEDTEXT, L"Compressed", x, y, 125, 22);
gSampleUI.AddStatic(IDC_ERRORTEXT, L"Error", x, y, 125, 22);
WCHAR wstr[MAX_PATH];
swprintf_s(wstr, MAX_PATH, L"Texture Size: %d x %d", gTexWidth, gTexHeight);
gSampleUI.AddStatic(IDC_SIZETEXT, wstr, x, y, 125, 22);
swprintf_s(wstr, MAX_PATH, L"%s: %.2f", kErrorStr, gError);
gSampleUI.AddStatic(IDC_RMSETEXT, wstr, x, y, 125, 22);
swprintf_s(wstr, MAX_PATH, L"Compression Time: %0.2f ms", gCompTime);
gSampleUI.AddStatic(IDC_TIMETEXT, wstr, x, y, 125, 22);
swprintf_s(wstr, MAX_PATH, L"Compression Rate: %0.2f Mp/s", gCompRate);
gSampleUI.AddStatic(IDC_RATETEXT, wstr, x, y, 125, 22);
gSampleUI.AddComboBox(IDC_TBB, x, y, 95, 22);
gSampleUI.AddComboBox(IDC_SIMD, x, y, 140, 22);
gSampleUI.AddComboBox(IDC_COMPRESSOR, x, y, 105, 22);
swprintf_s(wstr, MAX_PATH, L"Blocks Per Task: %d", gBlocksPerTask);
gSampleUI.AddStatic(IDC_BLOCKSPERTASKTEXT, wstr, x, y, 125, 22);
gSampleUI.AddSlider(IDC_BLOCKSPERTASK, x, y, 256, 22, 1, 512, gBlocksPerTask);
gSampleUI.AddButton(IDC_LOADTEXTURE, L"Load Texture", x, y, 125, 22);
gSampleUI.AddButton(IDC_RECOMPRESS, L"Recompress", x, y, 125, 22);
}
// Called right before creating a D3D11 device, allowing the app to modify the device settings as needed
bool CALLBACK ModifyDeviceSettings( DXUTDeviceSettings* pDeviceSettings, void* pUserContext )
{
// Uncomment this to get debug information from D3D11
//pDeviceSettings->d3d11.CreateFlags |= D3D11_CREATE_DEVICE_DEBUG;
// For the first device created if its a REF device, optionally display a warning dialog box
static bool s_bFirstTime = true;
if( s_bFirstTime )
{
s_bFirstTime = false;
if( ( DXUT_D3D11_DEVICE == pDeviceSettings->ver &&
pDeviceSettings->d3d11.DriverType == D3D_DRIVER_TYPE_REFERENCE ) )
{
DXUTDisplaySwitchingToREFWarning( pDeviceSettings->ver );
}
}
return true;
}
// Handle updates to the scene.
void CALLBACK OnFrameMove( double fTime, float fElapsedTime, void* pUserContext )
{
}
// Render the help and statistics text
void RenderText()
{
UINT nBackBufferHeight = ( DXUTIsAppRenderingWithD3D9() ) ? DXUTGetD3D9BackBufferSurfaceDesc()->Height :
DXUTGetDXGIBackBufferSurfaceDesc()->Height;
gTxtHelper->Begin();
gTxtHelper->SetInsertionPos( 2, 0 );
gTxtHelper->SetForegroundColor( D3DXCOLOR( 1.0f, 1.0f, 0.0f, 1.0f ) );
gTxtHelper->DrawTextLine( DXUTGetFrameStats( false ) );
gTxtHelper->DrawTextLine( DXUTGetDeviceStats() );
// Draw help
if( gShowHelp )
{
gTxtHelper->SetInsertionPos( 2, nBackBufferHeight - 20 * 6 );
gTxtHelper->SetForegroundColor( D3DXCOLOR( 1.0f, 0.75f, 0.0f, 1.0f ) );
gTxtHelper->DrawTextLine( L"Controls:" );
gTxtHelper->SetInsertionPos( 20, nBackBufferHeight - 20 * 5 );
gTxtHelper->DrawTextLine( L"Hide help: F1\n"
L"Quit: ESC\n" );
}
else
{
gTxtHelper->SetForegroundColor( D3DXCOLOR( 1.0f, 1.0f, 1.0f, 1.0f ) );
gTxtHelper->DrawTextLine( L"Press F1 for help" );
}
gTxtHelper->End();
}
// Handle messages to the application
LRESULT CALLBACK MsgProc( HWND hWnd, UINT uMsg, WPARAM wParam, LPARAM lParam, bool* pbNoFurtherProcessing,
void* pUserContext )
{
// Pass messages to dialog resource manager calls so GUI state is updated correctly
*pbNoFurtherProcessing = gDialogResourceManager.MsgProc( hWnd, uMsg, wParam, lParam );
if( *pbNoFurtherProcessing )
return 0;
// Pass messages to settings dialog if its active
if( gD3DSettingsDlg.IsActive() )
{
gD3DSettingsDlg.MsgProc( hWnd, uMsg, wParam, lParam );
return 0;
}
// Give the dialogs a chance to handle the message first
*pbNoFurtherProcessing = gHUD.MsgProc( hWnd, uMsg, wParam, lParam );
if( *pbNoFurtherProcessing )
return 0;
*pbNoFurtherProcessing = gSampleUI.MsgProc( hWnd, uMsg, wParam, lParam );
if( *pbNoFurtherProcessing )
return 0;
return 0;
}
// Handle key presses
void CALLBACK OnKeyboard( UINT nChar, bool bKeyDown, bool bAltDown, void* pUserContext )
{
if( bKeyDown )
{
switch( nChar )
{
case VK_F1:
gShowHelp = !gShowHelp; break;
}
}
}
// Handles the GUI events
void CALLBACK OnGUIEvent( UINT nEvent, int nControlID, CDXUTControl* pControl, void* pUserContext )
{
switch( nControlID )
{
case IDC_TOGGLEFULLSCREEN:
{
DXUTToggleFullScreen();
break;
}
case IDC_TOGGLEREF:
{
DXUTToggleREF();
break;
}
case IDC_CHANGEDEVICE:
{
gD3DSettingsDlg.SetActive( !gD3DSettingsDlg.IsActive() );
break;
}
case IDC_TIMETEXT:
{
WCHAR wstr[MAX_PATH];
swprintf_s(wstr, MAX_PATH, L"Compression Time: %0.2f ms", gCompTime);
gSampleUI.GetStatic(IDC_TIMETEXT)->SetText(wstr);
break;
}
case IDC_RATETEXT:
{
WCHAR wstr[MAX_PATH];
swprintf_s(wstr, MAX_PATH, L"Compression Rate: %0.2f Mp/s", gCompRate);
gSampleUI.GetStatic(IDC_RATETEXT)->SetText(wstr);
break;
}
case IDC_RMSETEXT:
{
WCHAR wstr[MAX_PATH];
swprintf_s(wstr, MAX_PATH, L"%s: %.2f", kErrorStr, gError);
gSampleUI.GetStatic(IDC_RMSETEXT)->SetText(wstr);
break;
}
case IDC_TBB:
{
// Shut down all previous threading abilities.
DestroyThreads();
EInstructionSet instrSet = gCompressionScheme->instrSet;
ECompressorType compType = gCompressionScheme->type;
EThreadMode newMode = (EThreadMode)(INT_PTR)gSampleUI.GetComboBox(IDC_TBB)->GetSelectedData();
switch(newMode) {
case eThreadMode_TBB:
// Initialize the TBB task manager.
gTaskMgr.Init();
break;
case eThreadMode_Win32:
InitWin32Threads();
break;
case eThreadMode_None:
// Do nothing, our threads are fine.
break;
}
SetCompressionScheme(instrSet, compType, newMode);
UpdateAllowedSettings();
// Recompress the texture.
RecompressTexture();
break;
}
case IDC_SIMD:
{
EThreadMode threadMode = gCompressionScheme->threadMode;
ECompressorType compType = gCompressionScheme->type;
EInstructionSet newInstrSet = (EInstructionSet)(INT_PTR)gSampleUI.GetComboBox(IDC_SIMD)->GetSelectedData();
// If we selected AVX2, then the total number of blocks when using AVX2 changes, so we need
// to reflect that in the slider.
UpdateBlockSlider();
SetCompressionScheme(newInstrSet, compType, threadMode);
UpdateAllowedSettings();
// Recompress the texture.
RecompressTexture();
break;
}
case IDC_COMPRESSOR:
{
EThreadMode threadMode = gCompressionScheme->threadMode;
EInstructionSet instrSet = gCompressionScheme->instrSet;
ECompressorType newCompType = (ECompressorType)(INT_PTR)gSampleUI.GetComboBox(IDC_COMPRESSOR)->GetSelectedData();
SetCompressionScheme(instrSet, newCompType, threadMode);
UpdateAllowedSettings();
// Recompress the texture.
RecompressTexture();
break;
}
case IDC_BLOCKSPERTASK:
{
gBlocksPerTask = gSampleUI.GetSlider(IDC_BLOCKSPERTASK)->GetValue();
WCHAR wstr[MAX_PATH];
swprintf_s(wstr, MAX_PATH, L"Blocks Per Task: %d", gBlocksPerTask);
gSampleUI.GetStatic(IDC_BLOCKSPERTASKTEXT)->SetText(wstr);
// Recompress the texture.
RecompressTexture();
break;
}
case IDC_LOADTEXTURE:
{
// Store the current working directory.
TCHAR workingDirectory[MAX_PATH];
GetCurrentDirectory(MAX_PATH, workingDirectory);
// Open a file dialog.
OPENFILENAME openFileName;
WCHAR file[MAX_PATH];
file[0] = 0;
ZeroMemory(&openFileName, sizeof(OPENFILENAME));
openFileName.lStructSize = sizeof(OPENFILENAME);
openFileName.lpstrFile = file;
openFileName.nMaxFile = MAX_PATH;
openFileName.lpstrFilter = L"DDS\0*.dds\0\0";
openFileName.nFilterIndex = 1;
openFileName.lpstrInitialDir = NULL;
openFileName.Flags = OFN_PATHMUSTEXIST | OFN_FILEMUSTEXIST;
if(GetOpenFileName(&openFileName))
{
CreateTextures(openFileName.lpstrFile);
}
// Restore the working directory. GetOpenFileName changes the current working directory which causes problems with relative paths to assets.
SetCurrentDirectory(workingDirectory);
break;
}
case IDC_RECOMPRESS:
{
// Recompress the texture.
RecompressTexture();
break;
}
}
}
// Reject any D3D11 devices that aren't acceptable by returning false
bool CALLBACK IsD3D11DeviceAcceptable( const CD3D11EnumAdapterInfo *AdapterInfo, UINT Output, const CD3D11EnumDeviceInfo *DeviceInfo,
DXGI_FORMAT BackBufferFormat, bool bWindowed, void* pUserContext )
{
return true;
}
// Find and compile the specified shader
HRESULT CompileShaderFromFile( WCHAR* szFileName, LPCSTR szEntryPoint, LPCSTR szShaderModel, ID3DBlob** ppBlobOut )
{
HRESULT hr = S_OK;
// find the file
WCHAR str[MAX_PATH];
V_RETURN( DXUTFindDXSDKMediaFileCch( str, MAX_PATH, szFileName ) );
DWORD dwShaderFlags = D3DCOMPILE_ENABLE_STRICTNESS;
#if defined( DEBUG ) || defined( _DEBUG )
// Set the D3DCOMPILE_DEBUG flag to embed debug information in the shaders.
// Setting this flag improves the shader debugging experience, but still allows
// the shaders to be optimized and to run exactly the way they will run in
// the release configuration of this program.
dwShaderFlags |= D3DCOMPILE_DEBUG;
#endif
ID3DBlob* pErrorBlob;
hr = D3DX11CompileFromFile( str, NULL, NULL, szEntryPoint, szShaderModel,
dwShaderFlags, 0, NULL, ppBlobOut, &pErrorBlob, NULL );
if( FAILED(hr) )
{
if( pErrorBlob != NULL )
OutputDebugStringA( (char*)pErrorBlob->GetBufferPointer() );
SAFE_RELEASE( pErrorBlob );
return hr;
}
SAFE_RELEASE( pErrorBlob );
return S_OK;
}
// Create any D3D11 resources that aren't dependent on the back buffer
HRESULT CALLBACK OnD3D11CreateDevice( ID3D11Device* pd3dDevice, const DXGI_SURFACE_DESC* pBackBufferSurfaceDesc,
void* pUserContext )
{
HRESULT hr;
ID3D11DeviceContext* pd3dImmediateContext = DXUTGetD3D11DeviceContext();
V_RETURN(gDialogResourceManager.OnD3D11CreateDevice(pd3dDevice, pd3dImmediateContext));
V_RETURN(gD3DSettingsDlg.OnD3D11CreateDevice(pd3dDevice));
gTxtHelper = new CDXUTTextHelper(pd3dDevice, pd3dImmediateContext, &gDialogResourceManager, 15);
// Create a vertex shader.
ID3DBlob* vertexShaderBuffer = NULL;
V_RETURN(CompileShaderFromFile(L"FastTextureCompressor\\FastTextureCompressor.hlsl", "PassThroughVS", "vs_4_0", &vertexShaderBuffer));
V_RETURN(pd3dDevice->CreateVertexShader(vertexShaderBuffer->GetBufferPointer(), vertexShaderBuffer->GetBufferSize(), NULL, &gVertexShader));
// Create a pixel shader that renders the composite frame.
ID3DBlob* pixelShaderBuffer = NULL;
V_RETURN(CompileShaderFromFile(L"FastTextureCompressor\\FastTextureCompressor.hlsl", "RenderFramePS", "ps_4_0", &pixelShaderBuffer));
V_RETURN(pd3dDevice->CreatePixelShader(pixelShaderBuffer->GetBufferPointer(), pixelShaderBuffer->GetBufferSize(), NULL, &gRenderFramePS));
// Create a pixel shader that renders the error texture.
V_RETURN(CompileShaderFromFile(L"FastTextureCompressor\\FastTextureCompressor.hlsl", "RenderTexturePS", "ps_4_0", &pixelShaderBuffer));
V_RETURN(pd3dDevice->CreatePixelShader(pixelShaderBuffer->GetBufferPointer(), pixelShaderBuffer->GetBufferSize(), NULL, &gRenderTexturePS));
// Create our vertex input layout
const D3D11_INPUT_ELEMENT_DESC layout[] =
{
{ "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 12, D3D11_INPUT_PER_VERTEX_DATA, 0 }
};
V_RETURN(pd3dDevice->CreateInputLayout(layout, ARRAYSIZE(layout), vertexShaderBuffer->GetBufferPointer(), vertexShaderBuffer->GetBufferSize(), &gVertexLayout));
SAFE_RELEASE(vertexShaderBuffer);
SAFE_RELEASE(pixelShaderBuffer);
// Create a vertex buffer for three textured quads.
D3DXVECTOR2 quadSize(0.32f, 0.32f);
D3DXVECTOR2 quadOrigin(-0.66f, -0.0f);
Vertex tripleQuadVertices[18];
ZeroMemory(tripleQuadVertices, sizeof(tripleQuadVertices));
for(int i = 0; i < 18; i += 6)
{
tripleQuadVertices[i].position = D3DXVECTOR3(quadOrigin.x - quadSize.x, quadOrigin.y + quadSize.y, 0.0f);
tripleQuadVertices[i].texCoord = D3DXVECTOR2(0.0f, 0.0f);
tripleQuadVertices[i + 1].position = D3DXVECTOR3(quadOrigin.x + quadSize.x, quadOrigin.y + quadSize.y, 0.0f);
tripleQuadVertices[i + 1].texCoord = D3DXVECTOR2(1.0f, 0.0f);
tripleQuadVertices[i + 2].position = D3DXVECTOR3(quadOrigin.x + quadSize.x, quadOrigin.y - quadSize.y, 0.0f);
tripleQuadVertices[i + 2].texCoord = D3DXVECTOR2(1.0f, 1.0f);
tripleQuadVertices[i + 3].position = D3DXVECTOR3(quadOrigin.x + quadSize.x, quadOrigin.y - quadSize.y, 0.0f);
tripleQuadVertices[i + 3].texCoord = D3DXVECTOR2(1.0f, 1.0f);
tripleQuadVertices[i + 4].position = D3DXVECTOR3(quadOrigin.x - quadSize.x, quadOrigin.y - quadSize.y, 0.0f);
tripleQuadVertices[i + 4].texCoord = D3DXVECTOR2(0.0f, 1.0f);
tripleQuadVertices[i + 5].position = D3DXVECTOR3(quadOrigin.x - quadSize.x, quadOrigin.y + quadSize.y, 0.0f);
tripleQuadVertices[i + 5].texCoord = D3DXVECTOR2(0.0f, 0.0f);
quadOrigin.x += 0.66f;
}
D3D11_BUFFER_DESC bufDesc;
ZeroMemory(&bufDesc, sizeof(bufDesc));
bufDesc.Usage = D3D11_USAGE_DEFAULT;
bufDesc.ByteWidth = sizeof(tripleQuadVertices);
bufDesc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
bufDesc.CPUAccessFlags = 0;
D3D11_SUBRESOURCE_DATA data;
ZeroMemory(&data, sizeof(data));
data.pSysMem = tripleQuadVertices;
V_RETURN(pd3dDevice->CreateBuffer(&bufDesc, &data, &gVertexBuffer));
// Create a vertex buffer for a single textured quad.
quadSize = D3DXVECTOR2(1.0f, 1.0f);
quadOrigin = D3DXVECTOR2(0.0f, 0.0f);
Vertex singleQuadVertices[6];
singleQuadVertices[0].position = D3DXVECTOR3(quadOrigin.x - quadSize.x, quadOrigin.y + quadSize.y, 0.0f);
singleQuadVertices[0].texCoord = D3DXVECTOR2(0.0f, 0.0f);
singleQuadVertices[1].position = D3DXVECTOR3(quadOrigin.x + quadSize.x, quadOrigin.y + quadSize.y, 0.0f);
singleQuadVertices[1].texCoord = D3DXVECTOR2(1.0f, 0.0f);
singleQuadVertices[2].position = D3DXVECTOR3(quadOrigin.x + quadSize.x, quadOrigin.y - quadSize.y, 0.0f);
singleQuadVertices[2].texCoord = D3DXVECTOR2(1.0f, 1.0f);
singleQuadVertices[3].position = D3DXVECTOR3(quadOrigin.x + quadSize.x, quadOrigin.y - quadSize.y, 0.0f);
singleQuadVertices[3].texCoord = D3DXVECTOR2(1.0f, 1.0f);
singleQuadVertices[4].position = D3DXVECTOR3(quadOrigin.x - quadSize.x, quadOrigin.y - quadSize.y, 0.0f);
singleQuadVertices[4].texCoord = D3DXVECTOR2(0.0f, 1.0f);
singleQuadVertices[5].position = D3DXVECTOR3(quadOrigin.x - quadSize.x, quadOrigin.y + quadSize.y, 0.0f);
singleQuadVertices[5].texCoord = D3DXVECTOR2(0.0f, 0.0f);
ZeroMemory(&bufDesc, sizeof(bufDesc));
bufDesc.Usage = D3D11_USAGE_DEFAULT;
bufDesc.ByteWidth = sizeof(singleQuadVertices);
bufDesc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
bufDesc.CPUAccessFlags = 0;
ZeroMemory(&data, sizeof(data));
data.pSysMem = singleQuadVertices;
V_RETURN(pd3dDevice->CreateBuffer(&bufDesc, &data, &gQuadVB));
// Create a sampler state
D3D11_SAMPLER_DESC SamDesc;
SamDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT;
SamDesc.AddressU = D3D11_TEXTURE_ADDRESS_WRAP;
SamDesc.AddressV = D3D11_TEXTURE_ADDRESS_WRAP;
SamDesc.AddressW = D3D11_TEXTURE_ADDRESS_WRAP;
SamDesc.MipLODBias = 0.0f;
SamDesc.MaxAnisotropy = 1;
SamDesc.ComparisonFunc = D3D11_COMPARISON_ALWAYS;
SamDesc.BorderColor[0] = SamDesc.BorderColor[1] = SamDesc.BorderColor[2] = SamDesc.BorderColor[3] = 0;
SamDesc.MinLOD = 0;
SamDesc.MaxLOD = D3D11_FLOAT32_MAX;
V_RETURN(pd3dDevice->CreateSamplerState(&SamDesc, &gSamPoint));
// Load and initialize the textures.
WCHAR path[MAX_PATH];
V_RETURN(DXUTFindDXSDKMediaFileCch(path, MAX_PATH, L"Images\\texture.dds"));
V_RETURN(CreateTextures(path));
return S_OK;
}
// Create any D3D11 resources that depend on the back buffer
HRESULT CALLBACK OnD3D11ResizedSwapChain( ID3D11Device* pd3dDevice, IDXGISwapChain* pSwapChain,
const DXGI_SURFACE_DESC* pBackBufferSurfaceDesc, void* pUserContext )
{
HRESULT hr;
V_RETURN( gDialogResourceManager.OnD3D11ResizedSwapChain( pd3dDevice, pBackBufferSurfaceDesc ) );
V_RETURN( gD3DSettingsDlg.OnD3D11ResizedSwapChain( pd3dDevice, pBackBufferSurfaceDesc ) );
gHUD.SetLocation( pBackBufferSurfaceDesc->Width - 170, 0 );
gHUD.SetSize( 170, 170 );
gSampleUI.SetLocation( 0, 0 );
gSampleUI.SetSize( pBackBufferSurfaceDesc->Width, pBackBufferSurfaceDesc->Height );
int oneThirdWidth = int(gSampleUI.GetWidth() / 3.0f);
int oneThirdHeight = int(gSampleUI.GetHeight() / 3.0f);
int x = 20;
int y = oneThirdHeight - 20;
gSampleUI.GetStatic(IDC_UNCOMPRESSEDTEXT)->SetLocation(x, y);
gSampleUI.GetStatic(IDC_COMPRESSEDTEXT)->SetLocation(x += oneThirdWidth, y);
gSampleUI.GetStatic(IDC_ERRORTEXT)->SetLocation(x += oneThirdWidth, y);
x = gSampleUI.GetWidth() - 276;
y = gSampleUI.GetHeight() - 216;
gSampleUI.GetStatic(IDC_SIZETEXT)->SetLocation(x, y);
gSampleUI.GetStatic(IDC_RMSETEXT)->SetLocation(x, y += 26);
gSampleUI.GetStatic(IDC_TIMETEXT)->SetLocation(x, y += 26);
gSampleUI.GetStatic(IDC_RATETEXT)->SetLocation(x, y += 26);
gSampleUI.GetComboBox(IDC_SIMD)->SetLocation(x, y += 26);
gSampleUI.GetComboBox(IDC_COMPRESSOR)->SetLocation(x + 150, y);
gSampleUI.GetStatic(IDC_BLOCKSPERTASKTEXT)->SetLocation(x, y += 26);
gSampleUI.GetComboBox(IDC_TBB)->SetLocation(x + 160, y);
gSampleUI.GetSlider(IDC_BLOCKSPERTASK)->SetLocation(x, y += 26);
gSampleUI.GetButton(IDC_LOADTEXTURE)->SetLocation(x, y += 26);
gSampleUI.GetButton(IDC_RECOMPRESS)->SetLocation(x + 131, y);
return S_OK;
}
// Render the scene using the D3D11 device
void CALLBACK OnD3D11FrameRender( ID3D11Device* pd3dDevice, ID3D11DeviceContext* pd3dImmediateContext, double fTime,
float fElapsedTime, void* pUserContext )
{
// Recompress the texture gFrameDelay frames after the app has started. This produces more accurate timing of the
// compression algorithm.
if(gFrameNum == gFrameDelay)
{
RecompressTexture();
gFrameNum++;
}
else if(gFrameNum < gFrameDelay)
{
gFrameNum++;
}
// If the settings dialog is being shown, then render it instead of rendering the app's scene
if( gD3DSettingsDlg.IsActive() )
{
gD3DSettingsDlg.OnRender( fElapsedTime );
return;
}
// Clear the render target and depth stencil
float ClearColor[4] = { 0.02f, 0.02f, 0.02f, 1.0f };
ID3D11RenderTargetView* pRTV = DXUTGetD3D11RenderTargetView();
pd3dImmediateContext->ClearRenderTargetView( pRTV, ClearColor );
ID3D11DepthStencilView* pDSV = DXUTGetD3D11DepthStencilView();
pd3dImmediateContext->ClearDepthStencilView( pDSV, D3D11_CLEAR_DEPTH, 1.0, 0 );
// Set the input layout.
pd3dImmediateContext->IASetInputLayout( gVertexLayout );
// Set the vertex buffer.
UINT stride = sizeof( Vertex );
UINT offset = 0;
pd3dImmediateContext->IASetVertexBuffers( 0, 1, &gVertexBuffer, &stride, &offset );
// Set the primitive topology
pd3dImmediateContext->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST );
// Set the shaders
pd3dImmediateContext->VSSetShader( gVertexShader, NULL, 0 );
pd3dImmediateContext->PSSetShader( gRenderFramePS, NULL, 0 );
// Set the texture sampler.
pd3dImmediateContext->PSSetSamplers( 0, 1, &gSamPoint );
// Render the uncompressed texture.
pd3dImmediateContext->PSSetShaderResources( 0, 1, &gUncompressedSRV );
pd3dImmediateContext->Draw( 6, 0 );
// Render the compressed texture.
pd3dImmediateContext->PSSetShaderResources( 0, 1, &gCompressedSRV );
pd3dImmediateContext->Draw( 6, 6 );
// Render the error texture.
pd3dImmediateContext->PSSetShaderResources( 0, 1, &gErrorSRV );
pd3dImmediateContext->Draw( 6, 12 );
DXUT_BeginPerfEvent( DXUT_PERFEVENTCOLOR, L"HUD / Stats" );
HRESULT hr;
V(gHUD.OnRender( fElapsedTime ));
V(gSampleUI.OnRender( fElapsedTime ));
RenderText();
DXUT_EndPerfEvent();
}
// Release D3D11 resources created in OnD3D11ResizedSwapChain
void CALLBACK OnD3D11ReleasingSwapChain( void* pUserContext )
{
gDialogResourceManager.OnD3D11ReleasingSwapChain();
}
// Release D3D11 resources created in OnD3D11CreateDevice
void CALLBACK OnD3D11DestroyDevice( void* pUserContext )
{
gDialogResourceManager.OnD3D11DestroyDevice();
gD3DSettingsDlg.OnD3D11DestroyDevice();
//CDXUTDirectionWidget::StaticOnD3D11DestroyDevice();
DXUTGetGlobalResourceCache().OnDestroyDevice();
SAFE_DELETE( gTxtHelper );
SAFE_RELEASE( gVertexLayout );
SAFE_RELEASE( gVertexBuffer );
SAFE_RELEASE( gQuadVB );
SAFE_RELEASE( gIndexBuffer );
SAFE_RELEASE( gVertexShader );
SAFE_RELEASE( gRenderFramePS );
SAFE_RELEASE( gRenderTexturePS );
SAFE_RELEASE( gSamPoint );
DestroyTextures();
}
// Free previously allocated texture resources and create new texture resources.
HRESULT CreateTextures(LPTSTR file)
{
// Destroy any previously created textures.
DestroyTextures();
// Load the uncompressed texture.
HRESULT hr;
V_RETURN(LoadTexture(file));
// Compress the texture.
V_RETURN(CompressTexture(gUncompressedSRV, &gCompressedSRV));
// Compute the error in the compressed texture.
V_RETURN(ComputeError(gUncompressedSRV, gCompressedSRV, &gErrorSRV));
return S_OK;
}
// Destroy texture resources.
void DestroyTextures()
{
SAFE_RELEASE(gErrorSRV);
SAFE_RELEASE(gCompressedSRV);
SAFE_RELEASE(gUncompressedSRV);
}
// This functions loads a texture and prepares it for compression. The compressor only works on texture
// dimensions that are divisible by 4. Textures that are not divisible by 4 are resized and padded with the edge values.
HRESULT LoadTexture(LPTSTR file)
{
// Load the uncrompressed texture.
// The loadInfo structure disables mipmapping by setting MipLevels to 1.
D3DX11_IMAGE_LOAD_INFO loadInfo;
ZeroMemory(&loadInfo, sizeof(D3DX11_IMAGE_LOAD_INFO));
loadInfo.Width = D3DX11_DEFAULT;
loadInfo.Height = D3DX11_DEFAULT;
loadInfo.Depth = D3DX11_DEFAULT;
loadInfo.FirstMipLevel = D3DX11_DEFAULT;
loadInfo.MipLevels = 1;
loadInfo.Usage = (D3D11_USAGE) D3DX11_DEFAULT;
loadInfo.BindFlags = D3D11_BIND_SHADER_RESOURCE;
loadInfo.CpuAccessFlags = D3DX11_DEFAULT;
loadInfo.MiscFlags = D3DX11_DEFAULT;
loadInfo.Format = DXGI_FORMAT_R8G8B8A8_UNORM_SRGB;
loadInfo.Filter = D3DX11_FILTER_POINT | D3DX11_FILTER_SRGB;
loadInfo.MipFilter = D3DX11_DEFAULT;
loadInfo.pSrcInfo = NULL;
HRESULT hr;
V_RETURN(D3DX11CreateShaderResourceViewFromFile(DXUTGetD3D11Device(), file, &loadInfo, NULL, &gUncompressedSRV, NULL));
// Pad the texture.
V_RETURN(PadTexture(&gUncompressedSRV));
// Query the texture description.
ID3D11Texture2D* tex;
gUncompressedSRV->GetResource((ID3D11Resource**)&tex);
D3D11_TEXTURE2D_DESC texDesc;
tex->GetDesc(&texDesc);
SAFE_RELEASE(tex);
// Update the UI's texture width and height.
gTexWidth = texDesc.Width;
gTexHeight = texDesc.Height;
WCHAR wstr[MAX_PATH];
swprintf_s(wstr, MAX_PATH, L"Texture Size: %d x %d", gTexWidth, gTexHeight);
gSampleUI.GetStatic(IDC_SIZETEXT)->SetText(wstr);
// gSampleUI.SendEvent(IDC_SIZETEXT, true, gSampleUI.GetStatic(IDC_SIZETEXT));
UpdateBlockSlider();
return S_OK;
}
void SetCompressionScheme(EInstructionSet instrSet, ECompressorType compType, EThreadMode threadMode) {
bool foundMatch = false;
for(int i = 0; i < kNumCompressionSchemes; i++) {
bool match = true;
match = match && kCompressionSchemes[i].instrSet == instrSet;
match = match && kCompressionSchemes[i].type == compType;
match = match && kCompressionSchemes[i].threadMode == threadMode;
if(match) {
gCompressionScheme = &(kCompressionSchemes[i]);
foundMatch = true;
break;
}
}
if(!foundMatch) {
OutputDebugString(L"ERROR: Did not find match for compression scheme, not changing.\n");
}
}
void UpdateCompressionModes() {
CDXUTComboBox *comboBox = gSampleUI.GetComboBox(IDC_COMPRESSOR);
comboBox->RemoveAllItems();
// If we're updating the compression modes, then see
// what we currently have selected and keep everything else constant.
EThreadMode currThreadMode = gCompressionScheme->threadMode;
EInstructionSet currInstrSet = gCompressionScheme->instrSet;
bool added[kNumCompressorTypes];
memset(added, 0, sizeof(added));
for(int i = 0; i < kNumCompressionSchemes; i++) {
bool match = kCompressionSchemes[i].instrSet == currInstrSet;
match = match && kCompressionSchemes[i].threadMode == currThreadMode;
match = match && kCompressionSchemes[i].availabilityOverride;
if(match) {
ECompressorType compType = kCompressionSchemes[i].type;
if(!added[compType]) {
comboBox->AddItem(kCompressorTypeStr[compType], (void*)(INT_PTR)compType);
added[compType] = true;
}
}
}
comboBox->SetSelectedByData((void *)(INT_PTR)(gCompressionScheme->type));
}
void UpdateCompressionAlgorithms() {
CDXUTComboBox *comboBox = gSampleUI.GetComboBox(IDC_SIMD);
comboBox->RemoveAllItems();
// If we're updating the compression algorithms, then see
// what we currently have selected and keep everything else constant.
EThreadMode currThreadMode = gCompressionScheme->threadMode;
ECompressorType currType = gCompressionScheme->type;
bool added[kNumInstructionSets];
memset(added, 0, sizeof(added));
for(int i = 0; i < kNumCompressionSchemes; i++) {
bool match = kCompressionSchemes[i].type == currType;
match = match && kCompressionSchemes[i].threadMode == currThreadMode;
match = match && kCompressionSchemes[i].availabilityOverride;
if(match) {
EInstructionSet instrSet = kCompressionSchemes[i].instrSet;
if(!added[instrSet]) {
comboBox->AddItem(kInstructionSetStr[instrSet], (void*)(INT_PTR)instrSet);
added[instrSet] = true;
}
}
}
comboBox->SetSelectedByData((void *)(INT_PTR)(gCompressionScheme->instrSet));
}
void UpdateThreadingMode() {
CDXUTComboBox *comboBox = gSampleUI.GetComboBox(IDC_TBB);
comboBox->RemoveAllItems();
// If we're updating the compression algorithms, then see
// what we currently have selected and keep everything else constant.
EInstructionSet currInstrSet = gCompressionScheme->instrSet;
ECompressorType currType = gCompressionScheme->type;
bool added[kNumThreadModes];
memset(added, 0, sizeof(added));
for(int i = 0; i < kNumCompressionSchemes; i++) {
bool match = kCompressionSchemes[i].type == currType;
match = match && kCompressionSchemes[i].instrSet == currInstrSet;
match = match && kCompressionSchemes[i].availabilityOverride;
if(match) {
EThreadMode threadMode = kCompressionSchemes[i].threadMode;
if(!added[threadMode]) {
comboBox->AddItem(kThreadModeStr[threadMode], (void*)(INT_PTR)threadMode);
added[threadMode] = true;
}
}
}
comboBox->SetSelectedByData((void *)(INT_PTR)(gCompressionScheme->threadMode));
}
void UpdateAllowedSettings() {
UpdateCompressionModes();
UpdateCompressionAlgorithms();
UpdateThreadingMode();
}
void UpdateBlockSlider() {
int blockRows = gTexHeight / 4;
int blockCols = gTexWidth / 4;
if(gCompressionScheme->instrSet == eInstrSet_AVX2) {
blockCols /= 2;
}
int numBlocks = blockRows * blockCols;
int blksPerProc = numBlocks / gNumProcessors;
gSampleUI.GetSlider(IDC_BLOCKSPERTASK)->SetRange(1, blksPerProc);
}
// Pad the texture to dimensions that are divisible by 4.
HRESULT PadTexture(ID3D11ShaderResourceView** textureSRV)
{
// Query the texture description.
ID3D11Texture2D* tex;
(*textureSRV)->GetResource((ID3D11Resource**)&tex);
D3D11_TEXTURE2D_DESC texDesc;
tex->GetDesc(&texDesc);
// Exit if the texture dimensions are divisible by 4.
if((texDesc.Width % 4 == 0) && (texDesc.Height % 4 == 0))
{
SAFE_RELEASE(tex);
return S_OK;
}
// Compute the size of the padded texture.
UINT padWidth = texDesc.Width / 4 * 4 + 4;
UINT padHeight = texDesc.Height / 4 * 4 + 4;
// Create a buffer for the padded texels.
BYTE* padTexels = new BYTE[padWidth * padHeight * 4];
// Create a staging resource for the texture.
HRESULT hr;
ID3D11Device* device = DXUTGetD3D11Device();
D3D11_TEXTURE2D_DESC stgTexDesc;
memcpy(&stgTexDesc, &texDesc, sizeof(D3D11_TEXTURE2D_DESC));
stgTexDesc.Usage = D3D11_USAGE_STAGING;
stgTexDesc.BindFlags = 0;
stgTexDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
ID3D11Texture2D* stgTex;
V_RETURN(device->CreateTexture2D(&stgTexDesc, NULL, &stgTex));
// Copy the texture into the staging resource.
ID3D11DeviceContext* deviceContext = DXUTGetD3D11DeviceContext();
deviceContext->CopyResource(stgTex, tex);
// Map the staging resource.
D3D11_MAPPED_SUBRESOURCE texData;
V_RETURN(deviceContext->Map(stgTex, D3D11CalcSubresource(0, 0, 1), D3D11_MAP_READ_WRITE, 0, &texData));
// Copy the beginning of each row.
BYTE* texels = (BYTE*)texData.pData;
for(UINT row = 0; row < stgTexDesc.Height; row++)
{
UINT rowStart = row * texData.RowPitch;
UINT padRowStart = row * padWidth * 4;
memcpy(padTexels + padRowStart, texels + rowStart, stgTexDesc.Width * 4);
// Pad the end of each row.
if(padWidth > stgTexDesc.Width)
{
BYTE* padVal = texels + rowStart + (stgTexDesc.Width - 1) * 4;
for(UINT padCol = stgTexDesc.Width; padCol < padWidth; padCol++)
{
UINT padColStart = padCol * 4;
memcpy(padTexels + padRowStart + padColStart, padVal, 4);
}
}
}
// Pad the end of each column.
if(padHeight > stgTexDesc.Height)
{
UINT lastRow = (stgTexDesc.Height - 1);
UINT lastRowStart = lastRow * padWidth * 4;
BYTE* padVal = padTexels + lastRowStart;
for(UINT padRow = stgTexDesc.Height; padRow < padHeight; padRow++)
{
UINT padRowStart = padRow * padWidth * 4;
memcpy(padTexels + padRowStart, padVal, padWidth * 4);
}
}
// Unmap the staging resources.
deviceContext->Unmap(stgTex, D3D11CalcSubresource(0, 0, 1));
// Create a padded texture.
D3D11_TEXTURE2D_DESC padTexDesc;
memcpy(&padTexDesc, &texDesc, sizeof(D3D11_TEXTURE2D_DESC));
padTexDesc.Width = padWidth;
padTexDesc.Height = padHeight;
D3D11_SUBRESOURCE_DATA padTexData;
ZeroMemory(&padTexData, sizeof(D3D11_SUBRESOURCE_DATA));
padTexData.pSysMem = padTexels;
padTexData.SysMemPitch = padWidth * sizeof(BYTE) * 4;
ID3D11Texture2D* padTex;
V_RETURN(device->CreateTexture2D(&padTexDesc, &padTexData, &padTex));
// Delete the padded texel buffer.
delete [] padTexels;
// Release the shader resource view for the texture.
SAFE_RELEASE(*textureSRV);
// Create a shader resource view for the padded texture.
D3D11_SHADER_RESOURCE_VIEW_DESC padTexSRVDesc;
padTexSRVDesc.Format = padTexDesc.Format;
padTexSRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
padTexSRVDesc.Texture2D.MipLevels = padTexDesc.MipLevels;
padTexSRVDesc.Texture2D.MostDetailedMip = padTexDesc.MipLevels - 1;
V_RETURN(device->CreateShaderResourceView(padTex, &padTexSRVDesc, textureSRV));
// Release resources.
SAFE_RELEASE(padTex);
SAFE_RELEASE(stgTex);
SAFE_RELEASE(tex);
return S_OK;
}
// Save a texture to a file.
HRESULT SaveTexture(ID3D11ShaderResourceView* textureSRV, LPTSTR file)
{
// Get the texture resource.
ID3D11Resource* texRes;
textureSRV->GetResource(&texRes);
if(texRes == NULL)
{
return E_POINTER;
}
// Save the texture to a file.
HRESULT hr;
V_RETURN(D3DX11SaveTextureToFile(DXUTGetD3D11DeviceContext(), texRes, D3DX11_IFF_DDS, file));
// Release the texture resources.
SAFE_RELEASE(texRes);
return S_OK;
}
// Compress a texture.
HRESULT CompressTexture(ID3D11ShaderResourceView* uncompressedSRV, ID3D11ShaderResourceView** compressedSRV)
{
// Query the texture description of the uncompressed texture.
ID3D11Resource* uncompRes;
gUncompressedSRV->GetResource(&uncompRes);
D3D11_TEXTURE2D_DESC uncompTexDesc;
((ID3D11Texture2D*)uncompRes)->GetDesc(&uncompTexDesc);
// Create a 2D texture for the compressed texture.
HRESULT hr;
ID3D11Texture2D* compTex;
D3D11_TEXTURE2D_DESC compTexDesc;
memcpy(&compTexDesc, &uncompTexDesc, sizeof(D3D11_TEXTURE2D_DESC));
switch(gCompressionScheme->type) {
default:
case eCompType_DXT1:
compTexDesc.Format = DXGI_FORMAT_BC1_UNORM_SRGB;
break;
case eCompType_DXT5:
compTexDesc.Format = DXGI_FORMAT_BC3_UNORM_SRGB;
break;
case eCompType_BC7:
compTexDesc.Format = DXGI_FORMAT_BC7_UNORM_SRGB;
break;
}
ID3D11Device* device = DXUTGetD3D11Device();
V_RETURN(device->CreateTexture2D(&compTexDesc, NULL, &compTex));
// Create a shader resource view for the compressed texture.
SAFE_RELEASE(*compressedSRV);
D3D11_SHADER_RESOURCE_VIEW_DESC compSRVDesc;
compSRVDesc.Format = compTexDesc.Format;
compSRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
compSRVDesc.Texture2D.MipLevels = compTexDesc.MipLevels;
compSRVDesc.Texture2D.MostDetailedMip = compTexDesc.MipLevels - 1;
V_RETURN(device->CreateShaderResourceView(compTex, &compSRVDesc, compressedSRV));
// Create a staging resource for the compressed texture.
compTexDesc.Usage = D3D11_USAGE_STAGING;
compTexDesc.BindFlags = 0;
compTexDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
ID3D11Texture2D* compStgTex;
V_RETURN(device->CreateTexture2D(&compTexDesc, NULL, &compStgTex));
// Create a staging resource for the uncompressed texture.
uncompTexDesc.Usage = D3D11_USAGE_STAGING;
uncompTexDesc.BindFlags = 0;
uncompTexDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
ID3D11Texture2D* uncompStgTex;
V_RETURN(device->CreateTexture2D(&uncompTexDesc, NULL, &uncompStgTex));
// Copy the uncompressed texture into the staging resource.
ID3D11DeviceContext* deviceContext = DXUTGetD3D11DeviceContext();
deviceContext->CopyResource(uncompStgTex, uncompRes);
// Map the staging resources.
D3D11_MAPPED_SUBRESOURCE uncompData;
V_RETURN(deviceContext->Map(uncompStgTex, D3D11CalcSubresource(0, 0, 1), D3D11_MAP_READ_WRITE, 0, &uncompData));
D3D11_MAPPED_SUBRESOURCE compData;
V_RETURN(deviceContext->Map(compStgTex, D3D11CalcSubresource(0, 0, 1), D3D11_MAP_READ_WRITE, 0, &compData));
// Time the compression.
StopWatch stopWatch;
stopWatch.Start();
const int kNumCompressions = 1;
for(int cmpNum = 0; cmpNum < kNumCompressions; cmpNum++) {
// Compress the uncompressed texels.
DXTC::CompressImageDXT((BYTE*)uncompData.pData, (BYTE*)compData.pData, uncompTexDesc.Width, uncompTexDesc.Height);
}
// Update the compression time.
stopWatch.Stop();
gCompTime = stopWatch.TimeInMilliseconds();
gSampleUI.SendEvent(IDC_TIMETEXT, true, gSampleUI.GetStatic(IDC_TIMETEXT));
// Compute the compression rate.
INT numPixels = compTexDesc.Width * compTexDesc.Height * kNumCompressions;
gCompRate = (double)numPixels / stopWatch.TimeInSeconds() / 1000000.0;
gSampleUI.SendEvent(IDC_RATETEXT, true, gSampleUI.GetStatic(IDC_RATETEXT));
stopWatch.Reset();
// Unmap the staging resources.
deviceContext->Unmap(compStgTex, D3D11CalcSubresource(0, 0, 1));
deviceContext->Unmap(uncompStgTex, D3D11CalcSubresource(0, 0, 1));
// Copy the staging resourse into the compressed texture.
deviceContext->CopyResource(compTex, compStgTex);
// Release resources.
SAFE_RELEASE(uncompStgTex);
SAFE_RELEASE(compStgTex);
SAFE_RELEASE(compTex);
SAFE_RELEASE(uncompRes);
return S_OK;
}
#define CHECK_WIN_THREAD_FUNC(x) \
do { \
if(NULL == (x)) { \
wchar_t wstr[256]; \
swprintf_s(wstr, L"Error detected from call %s at line %d of main.cpp", _T(#x), __LINE__); \
ReportWinThreadError(wstr); \
} \
} \
while(0)
void ReportWinThreadError(const wchar_t *str) {
// Retrieve the system error message for the last-error code.
LPVOID lpMsgBuf;
LPVOID lpDisplayBuf;
DWORD dw = GetLastError();
FormatMessage(
FORMAT_MESSAGE_ALLOCATE_BUFFER |
FORMAT_MESSAGE_FROM_SYSTEM |
FORMAT_MESSAGE_IGNORE_INSERTS,
NULL,
dw,
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
(LPTSTR) &lpMsgBuf,
0, NULL );
// Display the error message.
lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT,
(lstrlen((LPCTSTR) lpMsgBuf) + lstrlen((LPCTSTR)str) + 40) * sizeof(TCHAR));
StringCchPrintf((LPTSTR)lpDisplayBuf,
LocalSize(lpDisplayBuf) / sizeof(TCHAR),
TEXT("%s failed with error %d: %s"),
str, dw, lpMsgBuf);
MessageBox(NULL, (LPCTSTR) lpDisplayBuf, TEXT("Error"), MB_OK);
// Free error-handling buffer allocations.
LocalFree(lpMsgBuf);
LocalFree(lpDisplayBuf);
}
void InitWin32Threads() {
// Already initialized?
if(gNumWinThreads > 0) {
return;
}
SetLastError(0);
gNumWinThreads = gNumProcessors;
if(gNumWinThreads >= MAXIMUM_WAIT_OBJECTS)
gNumWinThreads = MAXIMUM_WAIT_OBJECTS;
// Create the synchronization events.
for(int i = 0; i < gNumWinThreads; i++) {
CHECK_WIN_THREAD_FUNC(gWinThreadWorkEvent[i] = CreateEvent(NULL, FALSE, FALSE, NULL));
}
CHECK_WIN_THREAD_FUNC(gWinThreadStartEvent = CreateEvent(NULL, TRUE, FALSE, NULL));
CHECK_WIN_THREAD_FUNC(gWinThreadDoneEvent = CreateEvent(NULL, TRUE, FALSE, NULL));
// Create threads
for(int threadIdx = 0; threadIdx < gNumWinThreads; threadIdx++) {
gWinThreadData[threadIdx].state = eThreadState_WaitForData;
CHECK_WIN_THREAD_FUNC(hThreadArray[threadIdx] = CreateThread(NULL, 0, DXTC::CompressImageDXTWinThread, &gWinThreadData[threadIdx], 0, &dwThreadIdArray[threadIdx]));
}
}
void DestroyThreads() {
switch(gCompressionScheme->threadMode) {
case eThreadMode_TBB:
{
// Shutdown the TBB task manager.
gTaskMgr.Shutdown();
}
break;
case eThreadMode_Win32:
{
// Release all windows threads that may be active...
for(int i=0; i < gNumWinThreads; i++) {
gWinThreadData[i].state = eThreadState_Done;
}
// Send the event for the threads to start.
CHECK_WIN_THREAD_FUNC(ResetEvent(gWinThreadDoneEvent));
CHECK_WIN_THREAD_FUNC(SetEvent(gWinThreadStartEvent));
// Wait for all the threads to finish....
DWORD dwWaitRet = WaitForMultipleObjects(gNumWinThreads, hThreadArray, TRUE, INFINITE);
if(WAIT_FAILED == dwWaitRet)
ReportWinThreadError(L"DestroyThreads() -- WaitForMultipleObjects");
// !HACK! This doesn't actually do anything. There is either a bug in the
// Intel compiler or the windows run-time that causes the threads to not
// be cleaned up properly if the following two lines of code are not present.
// Since we're passing INFINITE to WaitForMultipleObjects, that function will
// never time out and per-microsoft spec, should never give this return value...
// Even with these lines, the bug does not consistently disappear unless you
// clean and rebuild. Heigenbug?
//
// If we compile with MSVC, then the following two lines are not necessary.
else if(WAIT_TIMEOUT == dwWaitRet)
OutputDebugString(L"DestroyThreads() -- WaitForMultipleObjects -- TIMEOUT");
// Reset the start event
CHECK_WIN_THREAD_FUNC(ResetEvent(gWinThreadStartEvent));
CHECK_WIN_THREAD_FUNC(SetEvent(gWinThreadDoneEvent));
// Close all thread handles.
for(int i=0; i < gNumWinThreads; i++) {
CHECK_WIN_THREAD_FUNC(CloseHandle(hThreadArray[i]));
}
for(int i =0; i < kMaxWinThreads; i++ ){
hThreadArray[i] = NULL;
}
// Close all event handles...
CHECK_WIN_THREAD_FUNC(CloseHandle(gWinThreadDoneEvent));
gWinThreadDoneEvent = NULL;
CHECK_WIN_THREAD_FUNC(CloseHandle(gWinThreadStartEvent));
gWinThreadStartEvent = NULL;
for(int i = 0; i < gNumWinThreads; i++) {
CHECK_WIN_THREAD_FUNC(CloseHandle(gWinThreadWorkEvent[i]));
}
for(int i = 0; i < kMaxWinThreads; i++) {
gWinThreadWorkEvent[i] = NULL;
}
gNumWinThreads = 0;
}
break;
case eThreadMode_None:
// Do nothing.
break;
}
}
static inline DXGI_FORMAT GetNonSRGBFormat(DXGI_FORMAT f) {
switch(f) {
case DXGI_FORMAT_BC1_UNORM_SRGB: return DXGI_FORMAT_BC1_UNORM;
case DXGI_FORMAT_BC3_UNORM_SRGB: return DXGI_FORMAT_BC3_UNORM;
case DXGI_FORMAT_BC7_UNORM_SRGB: return DXGI_FORMAT_BC7_UNORM;
case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: return DXGI_FORMAT_R8G8B8A8_UNORM;
default: assert(!"Unknown format!");
}
return DXGI_FORMAT_R8G8B8A8_UNORM;
}
// Compute an "error" texture that represents the absolute difference in color between an
// uncompressed texture and a compressed texture.
HRESULT ComputeError(ID3D11ShaderResourceView* uncompressedSRV, ID3D11ShaderResourceView* compressedSRV, ID3D11ShaderResourceView** errorSRV)
{
HRESULT hr;
// Query the texture description of the uncompressed texture.
ID3D11Resource* uncompRes;
gUncompressedSRV->GetResource(&uncompRes);
D3D11_TEXTURE2D_DESC uncompTexDesc;
((ID3D11Texture2D*)uncompRes)->GetDesc(&uncompTexDesc);
// Query the texture description of the uncompressed texture.
ID3D11Resource* compRes;
gCompressedSRV->GetResource(&compRes);
D3D11_TEXTURE2D_DESC compTexDesc;
((ID3D11Texture2D*)compRes)->GetDesc(&compTexDesc);
// Create a 2D resource without gamma correction for the two textures.
compTexDesc.Format = GetNonSRGBFormat(compTexDesc.Format);
uncompTexDesc.Format = GetNonSRGBFormat(uncompTexDesc.Format);
ID3D11Device* device = DXUTGetD3D11Device();
ID3D11Texture2D* uncompTex;
device->CreateTexture2D(&uncompTexDesc, NULL, &uncompTex);
ID3D11Texture2D* compTex;
device->CreateTexture2D(&compTexDesc, NULL, &compTex);
// Create a shader resource view for the two textures.
D3D11_SHADER_RESOURCE_VIEW_DESC compSRVDesc;
compSRVDesc.Format = compTexDesc.Format;
compSRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
compSRVDesc.Texture2D.MipLevels = compTexDesc.MipLevels;
compSRVDesc.Texture2D.MostDetailedMip = compTexDesc.MipLevels - 1;
ID3D11ShaderResourceView *compSRV;
V_RETURN(device->CreateShaderResourceView(compTex, &compSRVDesc, &compSRV));
D3D11_SHADER_RESOURCE_VIEW_DESC uncompSRVDesc;
uncompSRVDesc.Format = uncompTexDesc.Format;
uncompSRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
uncompSRVDesc.Texture2D.MipLevels = uncompTexDesc.MipLevels;
uncompSRVDesc.Texture2D.MostDetailedMip = uncompTexDesc.MipLevels - 1;
ID3D11ShaderResourceView *uncompSRV;
V_RETURN(device->CreateShaderResourceView(uncompTex, &uncompSRVDesc, &uncompSRV));
// Create a 2D texture for the error texture.
ID3D11Texture2D* errorTex;
D3D11_TEXTURE2D_DESC errorTexDesc;
memcpy(&errorTexDesc, &uncompTexDesc, sizeof(D3D11_TEXTURE2D_DESC));
errorTexDesc.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE;
V_RETURN(device->CreateTexture2D(&errorTexDesc, NULL, &errorTex));
// Create a render target view for the error texture.
D3D11_RENDER_TARGET_VIEW_DESC errorRTVDesc;
errorRTVDesc.Format = errorTexDesc.Format;
errorRTVDesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D;
errorRTVDesc.Texture2D.MipSlice = 0;
ID3D11RenderTargetView* errorRTV;
V_RETURN(device->CreateRenderTargetView(errorTex, &errorRTVDesc, &errorRTV));
// Create a shader resource view for the error texture.
D3D11_SHADER_RESOURCE_VIEW_DESC errorSRVDesc;
errorSRVDesc.Format = errorTexDesc.Format;
errorSRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
errorSRVDesc.Texture2D.MipLevels = errorTexDesc.MipLevels;
errorSRVDesc.Texture2D.MostDetailedMip = errorTexDesc.MipLevels - 1;
V_RETURN(device->CreateShaderResourceView(errorTex, &errorSRVDesc, errorSRV));
// Create a query for the GPU operations...
D3D11_QUERY_DESC GPUQueryDesc;
GPUQueryDesc.Query = D3D11_QUERY_EVENT;
GPUQueryDesc.MiscFlags = 0;
#ifdef _DEBUG
D3D11_QUERY_DESC OcclusionQueryDesc;
OcclusionQueryDesc.Query = D3D11_QUERY_OCCLUSION;
OcclusionQueryDesc.MiscFlags = 0;
D3D11_QUERY_DESC StatsQueryDesc;
StatsQueryDesc.Query = D3D11_QUERY_PIPELINE_STATISTICS;
StatsQueryDesc.MiscFlags = 0;
#endif
ID3D11Query *GPUQuery;
V_RETURN(device->CreateQuery(&GPUQueryDesc, &GPUQuery));
ID3D11DeviceContext* deviceContext = DXUTGetD3D11DeviceContext();
deviceContext->CopyResource(compTex, compRes);
deviceContext->CopyResource(uncompTex, uncompRes);
#ifdef _DEBUG
ID3D11Query *OcclusionQuery, *StatsQuery;
V_RETURN(device->CreateQuery(&OcclusionQueryDesc, &OcclusionQuery));
V_RETURN(device->CreateQuery(&StatsQueryDesc, &StatsQuery));
deviceContext->Begin(OcclusionQuery);
deviceContext->Begin(StatsQuery);
#endif
// Set the viewport to a 1:1 mapping of pixels to texels.
D3D11_VIEWPORT viewport;
viewport.Width = (FLOAT)errorTexDesc.Width;
viewport.Height = (FLOAT)errorTexDesc.Height;
viewport.MinDepth = 0;
viewport.MaxDepth = 1;
viewport.TopLeftX = 0;
viewport.TopLeftY = 0;
deviceContext->RSSetViewports(1, &viewport);
// Bind the render target view of the error texture.
ID3D11RenderTargetView* RTV[1] = { errorRTV };
deviceContext->OMSetRenderTargets(1, RTV, NULL);
// Clear the render target.
FLOAT color[4] = { 0.0f, 0.0f, 0.0f, 1.0f };
deviceContext->ClearRenderTargetView(errorRTV, color);
// Set the input layout.
deviceContext->IASetInputLayout(gVertexLayout);
// Set vertex buffer
UINT stride = sizeof(Vertex);
UINT offset = 0;
deviceContext->IASetVertexBuffers(0, 1, &gQuadVB, &stride, &offset);
// Set the primitive topology
deviceContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
// Set the shaders
deviceContext->VSSetShader(gVertexShader, NULL, 0);
deviceContext->PSSetShader(gRenderTexturePS, NULL, 0);
// Set the texture sampler.
deviceContext->PSSetSamplers(0, 1, &gSamPoint);
// Bind the textures.
ID3D11ShaderResourceView* SRV[2] = { compSRV, uncompSRV};
deviceContext->PSSetShaderResources(0, 2, SRV);
// Store the depth/stencil state.
StoreDepthStencilState();
// Disable depth testing.
V_RETURN(DisableDepthTest());
// Render a quad.
deviceContext->Draw(6, 0);
// Restore the depth/stencil state.
RestoreDepthStencilState();
// Reset the render target.
RTV[0] = DXUTGetD3D11RenderTargetView();
deviceContext->OMSetRenderTargets(1, RTV, DXUTGetD3D11DepthStencilView());
// Reset the viewport.
viewport.Width = (FLOAT)DXUTGetDXGIBackBufferSurfaceDesc()->Width;
viewport.Height = (FLOAT)DXUTGetDXGIBackBufferSurfaceDesc()->Height;
deviceContext->RSSetViewports(1, &viewport);
deviceContext->End(GPUQuery);
#ifdef _DEBUG
deviceContext->End(OcclusionQuery);
deviceContext->End(StatsQuery);
#endif
BOOL finishedGPU = false;
// If we do not have a d3d 11 context, we will still hit this line and try to
// finish using the GPU. If this happens this enters an infinite loop.
int infLoopPrevention = 0;
while(!finishedGPU && ++infLoopPrevention < 10000) {
HRESULT ret;
V_RETURN(ret = deviceContext->GetData(GPUQuery, &finishedGPU, sizeof(BOOL), 0));
if(ret != S_OK)
Sleep(1);
}
#ifdef _DEBUG
UINT64 nPixelsWritten = 0;
deviceContext->GetData(OcclusionQuery, (void *)&nPixelsWritten, sizeof(UINT64), 0);
D3D11_QUERY_DATA_PIPELINE_STATISTICS stats;
deviceContext->GetData(StatsQuery, (void *)&stats, sizeof(D3D11_QUERY_DATA_PIPELINE_STATISTICS), 0);
TCHAR nPixelsWrittenMsg[256];
_stprintf(nPixelsWrittenMsg, _T("Pixels rendered during error computation: %d\n"), nPixelsWritten);
OutputDebugString(nPixelsWrittenMsg);
#endif
// Create a copy of the error texture that is accessible by the CPU
ID3D11Texture2D* errorTexCopy;
D3D11_TEXTURE2D_DESC errorTexCopyDesc;
memcpy(&errorTexCopyDesc, &uncompTexDesc, sizeof(D3D11_TEXTURE2D_DESC));
errorTexCopyDesc.Usage = D3D11_USAGE_STAGING;
errorTexCopyDesc.BindFlags = 0;
errorTexCopyDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
V_RETURN(device->CreateTexture2D(&errorTexCopyDesc, NULL, &errorTexCopy));
// Copy the error texture into the copy....
deviceContext->CopyResource(errorTexCopy, errorTex);
// Map the staging resource.
D3D11_MAPPED_SUBRESOURCE errorData;
V_RETURN(deviceContext->Map(errorTexCopy, D3D11CalcSubresource(0, 0, 1), D3D11_MAP_READ, 0, &errorData));
// Calculate PSNR
ComputeRMSE((const BYTE *)(errorData.pData), errorTexCopyDesc.Width, errorTexCopyDesc.Height);
gSampleUI.SendEvent(IDC_RMSETEXT, true, gSampleUI.GetStatic(IDC_RMSETEXT));
// Unmap the staging resources.
deviceContext->Unmap(errorTexCopy, D3D11CalcSubresource(0, 0, 1));
// Release resources.
SAFE_RELEASE(errorRTV);
SAFE_RELEASE(errorTex);
SAFE_RELEASE(errorTexCopy);
SAFE_RELEASE(uncompRes);
SAFE_RELEASE(compRes);
SAFE_RELEASE(GPUQuery);
#ifdef _DEBUG
SAFE_RELEASE(OcclusionQuery);
SAFE_RELEASE(StatsQuery);
#endif
SAFE_RELEASE(compSRV);
SAFE_RELEASE(uncompSRV);
SAFE_RELEASE(compTex);
SAFE_RELEASE(uncompTex);
return S_OK;
}
// Recompresses the already loaded texture and recomputes the error.
HRESULT RecompressTexture()
{
// Destroy any previously created textures.
SAFE_RELEASE(gErrorSRV);
SAFE_RELEASE(gCompressedSRV);
// Compress the texture.
HRESULT hr;
V_RETURN(CompressTexture(gUncompressedSRV, &gCompressedSRV));
// Compute the error in the compressed texture.
V_RETURN(ComputeError(gUncompressedSRV, gCompressedSRV, &gErrorSRV));
return S_OK;
}
// Store the depth-stencil state.
void StoreDepthStencilState()
{
DXUTGetD3D11DeviceContext()->OMGetDepthStencilState(&gDepthStencilState, &gStencilReference);
}
// Restore the depth-stencil state.
void RestoreDepthStencilState()
{
DXUTGetD3D11DeviceContext()->OMSetDepthStencilState(gDepthStencilState, gStencilReference);
}
// Disable depth testing.
HRESULT DisableDepthTest()
{
D3D11_DEPTH_STENCIL_DESC depStenDesc;
ZeroMemory(&depStenDesc, sizeof(D3D11_DEPTH_STENCIL_DESC));
depStenDesc.DepthEnable = FALSE;
depStenDesc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ALL;
depStenDesc.DepthFunc = D3D11_COMPARISON_LESS;
depStenDesc.StencilEnable = FALSE;
depStenDesc.StencilReadMask = D3D11_DEFAULT_STENCIL_READ_MASK;
depStenDesc.StencilWriteMask = D3D11_DEFAULT_STENCIL_WRITE_MASK;
depStenDesc.FrontFace.StencilFailOp = D3D11_STENCIL_OP_KEEP;
depStenDesc.FrontFace.StencilDepthFailOp = D3D11_STENCIL_OP_KEEP;
depStenDesc.FrontFace.StencilPassOp = D3D11_STENCIL_OP_KEEP;
depStenDesc.FrontFace.StencilFunc = D3D11_COMPARISON_ALWAYS;
depStenDesc.BackFace.StencilFailOp = D3D11_STENCIL_OP_KEEP;
depStenDesc.BackFace.StencilDepthFailOp = D3D11_STENCIL_OP_KEEP;
depStenDesc.BackFace.StencilPassOp = D3D11_STENCIL_OP_KEEP;
depStenDesc.BackFace.StencilFunc = D3D11_COMPARISON_ALWAYS;
ID3D11DepthStencilState* depStenState;
HRESULT hr;
V_RETURN(DXUTGetD3D11Device()->CreateDepthStencilState(&depStenDesc, &depStenState));
DXUTGetD3D11DeviceContext()->OMSetDepthStencilState(depStenState, 0);
SAFE_RELEASE(depStenState);
return S_OK;
}
void ComputeRMSE(const BYTE *errorData, const INT width, const INT height) {
const float *w = BC7C::GetErrorMetric();
const double wr = w[0];
const double wg = w[1];
const double wb = w[2];
double MSE = 0.0;
for(int i = 0; i < width; i++) {
for(int j = 0; j < height; j++) {
const INT pixel = ((const INT *)errorData)[j * width + i];
double dr = double(pixel & 0xFF) * wr;
double dg = double((pixel >> 8) & 0xFF) * wg;
double db = double((pixel >> 16) & 0xFF) * wb;
const double pixelMSE = (double(dr) * double(dr)) + (double(dg) * double(dg)) + (double(db) * double(db));
MSE += pixelMSE;
}
}
MSE /= (double(width) * double(height));
#ifdef REPORT_RMSE
gError = sqrt(MSE);
#else
double MAXI = (255.0 * wr) * (255.0 * wr) + (255.0 * wg) * (255.0 * wg) + (255.0 * wb) * (255.0 * wb);
gError= 10 * log10(MAXI/MSE);
#endif
}
namespace DXTC
{
VOID CompressImageDXT(const BYTE* inBuf, BYTE* outBuf, INT width, INT height) {
// If we aren't multi-cored, then just run everything serially.
if(gNumProcessors <= 1) {
CompressImageDXTNoThread(inBuf, outBuf, width, height);
return;
}
switch(gCompressionScheme->threadMode) {
case eThreadMode_None:
CompressImageDXTNoThread(inBuf, outBuf, width, height);
break;
case eThreadMode_TBB:
CompressImageDXTTBB(inBuf, outBuf, width, height);
break;
case eThreadMode_Win32:
CompressImageDXTWIN(inBuf, outBuf, width, height);
break;
}
}
CompressionFunc GetCompressionFunc() {
switch(gCompressionScheme->instrSet)
{
case eInstrSet_SSE:
{
switch(gCompressionScheme->type) {
case eCompType_DXT1: return DXTC::CompressImageDXT1SSE2;
case eCompType_DXT5: return DXTC::CompressImageDXT5SSE2;
case eCompType_BC7: return BC7C::CompressImageBC7SIMD;
}
}
break;
case eInstrSet_Scalar:
{
switch(gCompressionScheme->type) {
case eCompType_DXT1: return DXTC::CompressImageDXT1;
case eCompType_DXT5: return DXTC::CompressImageDXT5;
case eCompType_BC7: return BC7C::CompressImageBC7;
}
}
break;
#ifdef ENABLE_AVX2
case eInstrSet_AVX2:
{
switch(gCompressionScheme->type) {
case eCompType_DXT1: return DXTC::CompressImageDXT1AVX2;
case eCompType_DXT5: return DXTC::CompressImageDXT5AVX2;
}
}
#endif
}
return NULL;
}
void CompressImageDXTNoThread(const BYTE* inBuf, BYTE* outBuf, INT width, INT height) {
CompressionFunc cmpFunc = GetCompressionFunc();
if(cmpFunc == NULL) {
OutputDebugString(L"DXTC::CompressImageDXTNoThread -- Compression Scheme not implemented!\n");
return;
}
// Do the compression.
(*cmpFunc)(inBuf, outBuf, width, height);
}
// Use the TBB task manager to compress an image with DXT compression.
VOID CompressImageDXTTBB(const BYTE* inBuf, BYTE* outBuf, INT width, INT height)
{
// Initialize the data.
DXTTaskData data;
data.inBuf = inBuf;
data.outBuf = outBuf;
data.width = width;
data.height = height;
data.numBlocks = width * height / 16;
if(gCompressionScheme->instrSet == eInstrSet_AVX2) {
data.numBlocks = width * height / 32;
}
data.kBlocksPerTask = gBlocksPerTask;
// Compute the task count.
UINT taskCount = (UINT)ceil((float)data.numBlocks / gBlocksPerTask);
// Create the task set.
TASKSETFUNC taskFunc = NULL;
switch(gCompressionScheme->instrSet)
{
case eInstrSet_SSE:
{
switch(gCompressionScheme->type) {
case eCompType_DXT1: taskFunc = DXTC::CompressImageDXT1SSE2Task; break;
case eCompType_DXT5: taskFunc = DXTC::CompressImageDXT5SSE2Task; break;
}
}
break;
case eInstrSet_Scalar:
{
switch(gCompressionScheme->type) {
case eCompType_DXT1: taskFunc = DXTC::CompressImageDXT1Task; break;
case eCompType_DXT5: taskFunc = DXTC::CompressImageDXT5Task; break;
}
}
break;
#ifdef ENABLE_AVX2
case eInstrSet_AVX2:
{
switch(gCompressionScheme->type) {
case eCompType_DXT1: taskFunc = DXTC::CompressImageDXT1AVX2Task; break;
case eCompType_DXT5: taskFunc = DXTC::CompressImageDXT5AVX2Task; break;
}
}
break;
#endif
}
TASKSETHANDLE taskSet;
gTaskMgr.CreateTaskSet(taskFunc, &data, taskCount, NULL, 0, "Fast Texture Compression", &taskSet);
if(taskSet == TASKSETHANDLE_INVALID)
{
return;
}
// Wait for the task set.
gTaskMgr.WaitForSet(taskSet);
// Release the task set.
gTaskMgr.ReleaseHandle(taskSet);
taskSet = TASKSETHANDLE_INVALID;
}
int GetBlocksPerLoop() {
if(gCompressionScheme->instrSet == eInstrSet_AVX2)
return 2;
return 1;
}
int GetBytesPerBlock() {
switch(gCompressionScheme->type) {
default:
case eCompType_DXT1:
return 8;
case eCompType_DXT5:
case eCompType_BC7:
return 16;
}
}
VOID CompressImageDXTWIN(const BYTE* inBuf, BYTE* outBuf, INT width, INT height) {
const int numThreads = gNumWinThreads;
const int blocksPerLoop = GetBlocksPerLoop();
const int bytesPerBlock = GetBytesPerBlock();
// We want to split the data evenly among all threads.
const int kNumPixels = width * height;
const int kNumBlocks = kNumPixels >> (3 + blocksPerLoop);
const int kBlocksPerRow = width >> (1 + blocksPerLoop);
const int kBlocksPerThread = kNumBlocks / numThreads;
const int kBlocksPerColumn = height >> 2;
const int kBlockRowsPerThread = kBlocksPerThread / kBlocksPerRow;
const int kBlockColsPerThread = kBlocksPerThread % kBlocksPerRow;
const int kOffsetPerThread = kBlockRowsPerThread * width * 4 * 4 + kBlockColsPerThread * 4 * 4 * (blocksPerLoop);
const int kHeightPerThread = (blocksPerLoop * 16 * kBlocksPerThread) / width;
CompressionFunc cmpFunc = GetCompressionFunc();
if(cmpFunc == NULL) {
OutputDebugString(L"DXTC::CompressImageDXTNoThread -- Compression Scheme not implemented!\n");
return;
}
// Load the threads.
for(int threadIdx = 0; threadIdx < numThreads; threadIdx++) {
WinThreadData *data = &gWinThreadData[threadIdx];
data->inBuf = inBuf + (threadIdx * kOffsetPerThread);
data->outBuf = outBuf + (threadIdx * kBlocksPerThread * blocksPerLoop * bytesPerBlock);
data->width = width;
data->height = kHeightPerThread;
data->cmpFunc = cmpFunc;
data->state = eThreadState_DataLoaded;
data->threadIdx = threadIdx;
}
// Send the event for the threads to start.
CHECK_WIN_THREAD_FUNC(ResetEvent(gWinThreadDoneEvent));
CHECK_WIN_THREAD_FUNC(SetEvent(gWinThreadStartEvent));
// Wait for all the threads to finish
if(WAIT_FAILED == WaitForMultipleObjects(numThreads, gWinThreadWorkEvent, TRUE, INFINITE))
ReportWinThreadError(TEXT("CompressImageDXTWIN -- WaitForMultipleObjects"));
// Reset the start event
CHECK_WIN_THREAD_FUNC(ResetEvent(gWinThreadStartEvent));
CHECK_WIN_THREAD_FUNC(SetEvent(gWinThreadDoneEvent));
}
DWORD WINAPI CompressImageDXTWinThread( LPVOID lpParam ) {
WinThreadData *data = (WinThreadData *)lpParam;
while(data->state != eThreadState_Done) {
if(WAIT_FAILED == WaitForSingleObject(gWinThreadStartEvent, INFINITE))
ReportWinThreadError(TEXT("CompressImageDXTWinThread -- WaitForSingleObject"));
if(data->state == eThreadState_Done)
break;
data->state = eThreadState_Running;
(*(data->cmpFunc))(data->inBuf, data->outBuf, data->width, data->height);
data->state = eThreadState_WaitForData;
HANDLE workEvent = gWinThreadWorkEvent[data->threadIdx];
if(WAIT_FAILED == SignalObjectAndWait(workEvent, gWinThreadDoneEvent, INFINITE, FALSE))
ReportWinThreadError(TEXT("CompressImageDXTWinThread -- SignalObjectAndWait"));
}
return 0;
}
}