mirror of
https://github.com/Ryujinx/SDL.git
synced 2025-01-11 02:05:34 +00:00
cpuinfo: Added some internal SIMD-aligned allocation functions.
Fixes Bugzilla #4150 (sort of).
This commit is contained in:
parent
999af8099b
commit
8543ad7df1
|
@ -169,7 +169,6 @@ extern DECLSPEC SDL_bool SDLCALL SDL_HasNEON(void);
|
||||||
*/
|
*/
|
||||||
extern DECLSPEC int SDLCALL SDL_GetSystemRAM(void);
|
extern DECLSPEC int SDLCALL SDL_GetSystemRAM(void);
|
||||||
|
|
||||||
|
|
||||||
/* Ends C function definitions when using C++ */
|
/* Ends C function definitions when using C++ */
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,6 +38,7 @@
|
||||||
/* CPU feature detection for SDL */
|
/* CPU feature detection for SDL */
|
||||||
|
|
||||||
#include "SDL_cpuinfo.h"
|
#include "SDL_cpuinfo.h"
|
||||||
|
#include "SDL_assert.h"
|
||||||
|
|
||||||
#ifdef HAVE_SYSCONF
|
#ifdef HAVE_SYSCONF
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
@ -571,6 +572,7 @@ SDL_GetCPUCacheLineSize(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
static Uint32 SDL_CPUFeatures = 0xFFFFFFFF;
|
static Uint32 SDL_CPUFeatures = 0xFFFFFFFF;
|
||||||
|
static Uint32 SDL_SIMDAlignment = 0xFFFFFFFF;
|
||||||
|
|
||||||
static Uint32
|
static Uint32
|
||||||
SDL_GetCPUFeatures(void)
|
SDL_GetCPUFeatures(void)
|
||||||
|
@ -578,41 +580,53 @@ SDL_GetCPUFeatures(void)
|
||||||
if (SDL_CPUFeatures == 0xFFFFFFFF) {
|
if (SDL_CPUFeatures == 0xFFFFFFFF) {
|
||||||
CPU_calcCPUIDFeatures();
|
CPU_calcCPUIDFeatures();
|
||||||
SDL_CPUFeatures = 0;
|
SDL_CPUFeatures = 0;
|
||||||
|
SDL_SIMDAlignment = 4; /* a good safe base value */
|
||||||
if (CPU_haveRDTSC()) {
|
if (CPU_haveRDTSC()) {
|
||||||
SDL_CPUFeatures |= CPU_HAS_RDTSC;
|
SDL_CPUFeatures |= CPU_HAS_RDTSC;
|
||||||
}
|
}
|
||||||
if (CPU_haveAltiVec()) {
|
if (CPU_haveAltiVec()) {
|
||||||
SDL_CPUFeatures |= CPU_HAS_ALTIVEC;
|
SDL_CPUFeatures |= CPU_HAS_ALTIVEC;
|
||||||
|
SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
|
||||||
}
|
}
|
||||||
if (CPU_haveMMX()) {
|
if (CPU_haveMMX()) {
|
||||||
SDL_CPUFeatures |= CPU_HAS_MMX;
|
SDL_CPUFeatures |= CPU_HAS_MMX;
|
||||||
|
SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8);
|
||||||
}
|
}
|
||||||
if (CPU_have3DNow()) {
|
if (CPU_have3DNow()) {
|
||||||
SDL_CPUFeatures |= CPU_HAS_3DNOW;
|
SDL_CPUFeatures |= CPU_HAS_3DNOW;
|
||||||
|
SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8);
|
||||||
}
|
}
|
||||||
if (CPU_haveSSE()) {
|
if (CPU_haveSSE()) {
|
||||||
SDL_CPUFeatures |= CPU_HAS_SSE;
|
SDL_CPUFeatures |= CPU_HAS_SSE;
|
||||||
|
SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
|
||||||
}
|
}
|
||||||
if (CPU_haveSSE2()) {
|
if (CPU_haveSSE2()) {
|
||||||
SDL_CPUFeatures |= CPU_HAS_SSE2;
|
SDL_CPUFeatures |= CPU_HAS_SSE2;
|
||||||
|
SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
|
||||||
}
|
}
|
||||||
if (CPU_haveSSE3()) {
|
if (CPU_haveSSE3()) {
|
||||||
SDL_CPUFeatures |= CPU_HAS_SSE3;
|
SDL_CPUFeatures |= CPU_HAS_SSE3;
|
||||||
|
SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
|
||||||
}
|
}
|
||||||
if (CPU_haveSSE41()) {
|
if (CPU_haveSSE41()) {
|
||||||
SDL_CPUFeatures |= CPU_HAS_SSE41;
|
SDL_CPUFeatures |= CPU_HAS_SSE41;
|
||||||
|
SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
|
||||||
}
|
}
|
||||||
if (CPU_haveSSE42()) {
|
if (CPU_haveSSE42()) {
|
||||||
SDL_CPUFeatures |= CPU_HAS_SSE42;
|
SDL_CPUFeatures |= CPU_HAS_SSE42;
|
||||||
|
SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
|
||||||
}
|
}
|
||||||
if (CPU_haveAVX()) {
|
if (CPU_haveAVX()) {
|
||||||
SDL_CPUFeatures |= CPU_HAS_AVX;
|
SDL_CPUFeatures |= CPU_HAS_AVX;
|
||||||
|
SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
|
||||||
}
|
}
|
||||||
if (CPU_haveAVX2()) {
|
if (CPU_haveAVX2()) {
|
||||||
SDL_CPUFeatures |= CPU_HAS_AVX2;
|
SDL_CPUFeatures |= CPU_HAS_AVX2;
|
||||||
|
SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
|
||||||
}
|
}
|
||||||
if (CPU_haveNEON()) {
|
if (CPU_haveNEON()) {
|
||||||
SDL_CPUFeatures |= CPU_HAS_NEON;
|
SDL_CPUFeatures |= CPU_HAS_NEON;
|
||||||
|
SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return SDL_CPUFeatures;
|
return SDL_CPUFeatures;
|
||||||
|
@ -745,6 +759,44 @@ SDL_GetSystemRAM(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
size_t
|
||||||
|
SDL_SIMDGetAlignment(void)
|
||||||
|
{
|
||||||
|
if (SDL_SIMDAlignment == 0xFFFFFFFF) {
|
||||||
|
SDL_GetCPUFeatures(); /* make sure this has been calculated */
|
||||||
|
}
|
||||||
|
SDL_assert(SDL_SIMDAlignment != 0);
|
||||||
|
return SDL_SIMDAlignment;
|
||||||
|
}
|
||||||
|
|
||||||
|
void *
|
||||||
|
SDL_SIMDAlloc(const size_t len)
|
||||||
|
{
|
||||||
|
const size_t alignment = SDL_SIMDGetAlignment();
|
||||||
|
const size_t padding = alignment - (len % alignment);
|
||||||
|
const size_t padded = (padding != alignment) ? (len + padding) : len;
|
||||||
|
Uint8 *retval = NULL;
|
||||||
|
Uint8 *ptr = (Uint8 *) SDL_malloc(padded + alignment + sizeof (void *));
|
||||||
|
if (ptr) {
|
||||||
|
/* store the actual malloc pointer right before our aligned pointer. */
|
||||||
|
retval = ptr + sizeof (void *);
|
||||||
|
retval += alignment - (((size_t) retval) % alignment);
|
||||||
|
*(((void **) retval) - 1) = ptr;
|
||||||
|
}
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
SDL_SIMDFree(void *ptr)
|
||||||
|
{
|
||||||
|
if (ptr) {
|
||||||
|
void **realptr = (void **) ptr;
|
||||||
|
realptr--;
|
||||||
|
SDL_free(*(((void **) ptr) - 1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#ifdef TEST_MAIN
|
#ifdef TEST_MAIN
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
88
src/cpuinfo/SDL_simd.h
Normal file
88
src/cpuinfo/SDL_simd.h
Normal file
|
@ -0,0 +1,88 @@
|
||||||
|
/*
|
||||||
|
Simple DirectMedia Layer
|
||||||
|
Copyright (C) 1997-2018 Sam Lantinga <slouken@libsdl.org>
|
||||||
|
|
||||||
|
This software is provided 'as-is', without any express or implied
|
||||||
|
warranty. In no event will the authors be held liable for any damages
|
||||||
|
arising from the use of this software.
|
||||||
|
|
||||||
|
Permission is granted to anyone to use this software for any purpose,
|
||||||
|
including commercial applications, and to alter it and redistribute it
|
||||||
|
freely, subject to the following restrictions:
|
||||||
|
|
||||||
|
1. The origin of this software must not be misrepresented; you must not
|
||||||
|
claim that you wrote the original software. If you use this software
|
||||||
|
in a product, an acknowledgment in the product documentation would be
|
||||||
|
appreciated but is not required.
|
||||||
|
2. Altered source versions must be plainly marked as such, and must not be
|
||||||
|
misrepresented as being the original software.
|
||||||
|
3. This notice may not be removed or altered from any source distribution.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "SDL.h"
|
||||||
|
#include "../SDL_internal.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Report the alignment this system needs for SIMD allocations.
|
||||||
|
*
|
||||||
|
* This will return the minimum number of bytes to which a pointer must be
|
||||||
|
* aligned to be compatible with SIMD instructions on the current machine.
|
||||||
|
* For example, if the machine supports SSE only, it will return 16, but if
|
||||||
|
* it supports AVX-512F, it'll return 64 (etc). This only reports values for
|
||||||
|
* instruction sets SDL knows about, so if your SDL build doesn't have
|
||||||
|
* SDL_HasAVX512F(), then it might return 16 for the SSE support it sees and
|
||||||
|
* not 64 for the AVX-512 instructions that exist but SDL doesn't know about.
|
||||||
|
* Plan accordingly.
|
||||||
|
*/
|
||||||
|
extern size_t SDL_SIMDGetAlignment(void);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Allocate memory in a SIMD-friendly way.
|
||||||
|
*
|
||||||
|
* This will allocate a block of memory that is suitable for use with SIMD
|
||||||
|
* instructions. Specifically, it will be properly aligned and padded for
|
||||||
|
* the system's supported vector instructions.
|
||||||
|
*
|
||||||
|
* The memory returned will be padded such that it is safe to read or write
|
||||||
|
* an incomplete vector at the end of the memory block. This can be useful
|
||||||
|
* so you don't have to drop back to a scalar fallback at the end of your
|
||||||
|
* SIMD processing loop to deal with the final elements without overflowing
|
||||||
|
* the allocated buffer.
|
||||||
|
*
|
||||||
|
* You must free this memory with SDL_FreeSIMD(), not free() or SDL_free()
|
||||||
|
* or delete[], etc.
|
||||||
|
*
|
||||||
|
* Note that SDL will only deal with SIMD instruction sets it is aware of;
|
||||||
|
* for example, SDL 2.0.8 knows that SSE wants 16-byte vectors
|
||||||
|
* (SDL_HasSSE()), and AVX2 wants 32 bytes (SDL_HasAVX2()), but doesn't
|
||||||
|
* know that AVX-512 wants 64. To be clear: if you can't decide to use an
|
||||||
|
* instruction set with an SDL_Has*() function, don't use that instruction
|
||||||
|
* set with memory allocated through here.
|
||||||
|
*
|
||||||
|
* SDL_AllocSIMD(0) will return a non-NULL pointer, assuming the system isn't
|
||||||
|
* out of memory.
|
||||||
|
*
|
||||||
|
* \param len The length, in bytes, of the block to allocated. The actual
|
||||||
|
* allocated block might be larger due to padding, etc.
|
||||||
|
* \return Pointer to newly-allocated block, NULL if out of memory.
|
||||||
|
*
|
||||||
|
* \sa SDL_SIMDAlignment
|
||||||
|
* \sa SDL_SIMDFree
|
||||||
|
*/
|
||||||
|
extern void * SDL_SIMDAlloc(const size_t len);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Deallocate memory obtained from SDL_SIMDAlloc
|
||||||
|
*
|
||||||
|
* It is not valid to use this function on a pointer from anything but
|
||||||
|
* SDL_SIMDAlloc(). It can't be used on pointers from malloc, realloc,
|
||||||
|
* SDL_malloc, memalign, new[], etc.
|
||||||
|
*
|
||||||
|
* However, SDL_SIMDFree(NULL) is a legal no-op.
|
||||||
|
*
|
||||||
|
* \sa SDL_SIMDAlloc
|
||||||
|
*/
|
||||||
|
extern void SDL_SIMDFree(void *ptr);
|
||||||
|
|
||||||
|
/* vi: set ts=4 sw=4 expandtab: */
|
||||||
|
|
Loading…
Reference in a new issue