mirror of
https://github.com/Ryujinx/SDL.git
synced 2025-01-25 19:31:06 +00:00
ARM: assembly optimization for SDL_FillRect
This commit is contained in:
parent
7ac733f025
commit
becc649ae2
|
@ -22,6 +22,7 @@
|
||||||
|
|
||||||
#include "SDL_video.h"
|
#include "SDL_video.h"
|
||||||
#include "SDL_blit.h"
|
#include "SDL_blit.h"
|
||||||
|
#include "SDL_cpuinfo.h"
|
||||||
|
|
||||||
|
|
||||||
#ifdef __SSE__
|
#ifdef __SSE__
|
||||||
|
@ -280,6 +281,28 @@ SDL_FillRects(SDL_Surface * dst, const SDL_Rect * rects, int count,
|
||||||
return SDL_SetError("SDL_FillRects() passed NULL rects");
|
return SDL_SetError("SDL_FillRects() passed NULL rects");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if SDL_ARM_SIMD_BLITTERS
|
||||||
|
if (SDL_HasARMSIMD() && dst->format->BytesPerPixel != 3) {
|
||||||
|
void FillRect8ARMSIMDAsm(int32_t w, int32_t h, uint8_t *dst, int32_t dst_stride, uint8_t src);
|
||||||
|
void FillRect16ARMSIMDAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint16_t src);
|
||||||
|
void FillRect32ARMSIMDAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t src);
|
||||||
|
switch (dst->format->BytesPerPixel) {
|
||||||
|
case 1:
|
||||||
|
FillRect8ARMSIMDAsm(rect->w, rect->h, (uint8_t *) pixels, dst->pitch >> 0, color);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
FillRect16ARMSIMDAsm(rect->w, rect->h, (uint16_t *) pixels, dst->pitch >> 1, color);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
FillRect32ARMSIMDAsm(rect->w, rect->h, (uint32_t *) pixels, dst->pitch >> 2, color);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
SDL_UnlockSurface(dst);
|
||||||
|
return(0);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
switch (dst->format->BytesPerPixel) {
|
switch (dst->format->BytesPerPixel) {
|
||||||
case 1:
|
case 1:
|
||||||
{
|
{
|
||||||
|
|
|
@ -47,6 +47,74 @@
|
||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
|
.macro FillRect32_init
|
||||||
|
ldr SRC, [sp, #ARGS_STACK_OFFSET]
|
||||||
|
mov STRIDE_S, SRC
|
||||||
|
mov MASK, SRC
|
||||||
|
mov STRIDE_M, SRC
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro FillRect16_init
|
||||||
|
ldrh SRC, [sp, #ARGS_STACK_OFFSET]
|
||||||
|
orr SRC, SRC, lsl #16
|
||||||
|
mov STRIDE_S, SRC
|
||||||
|
mov MASK, SRC
|
||||||
|
mov STRIDE_M, SRC
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro FillRect8_init
|
||||||
|
ldrb SRC, [sp, #ARGS_STACK_OFFSET]
|
||||||
|
orr SRC, SRC, lsl #8
|
||||||
|
orr SRC, SRC, lsl #16
|
||||||
|
mov STRIDE_S, SRC
|
||||||
|
mov MASK, SRC
|
||||||
|
mov STRIDE_M, SRC
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro FillRect_process_tail cond, numbytes, firstreg
|
||||||
|
WK4 .req SRC
|
||||||
|
WK5 .req STRIDE_S
|
||||||
|
WK6 .req MASK
|
||||||
|
WK7 .req STRIDE_M
|
||||||
|
pixst cond, numbytes, 4, DST
|
||||||
|
.unreq WK4
|
||||||
|
.unreq WK5
|
||||||
|
.unreq WK6
|
||||||
|
.unreq WK7
|
||||||
|
.endm
|
||||||
|
|
||||||
|
generate_composite_function \
|
||||||
|
FillRect32ARMSIMDAsm, 0, 0, 32, \
|
||||||
|
FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \
|
||||||
|
0, /* prefetch distance doesn't apply */ \
|
||||||
|
FillRect32_init \
|
||||||
|
nop_macro, /* newline */ \
|
||||||
|
nop_macro /* cleanup */ \
|
||||||
|
nop_macro /* process head */ \
|
||||||
|
FillRect_process_tail
|
||||||
|
|
||||||
|
generate_composite_function \
|
||||||
|
FillRect16ARMSIMDAsm, 0, 0, 16, \
|
||||||
|
FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \
|
||||||
|
0, /* prefetch distance doesn't apply */ \
|
||||||
|
FillRect16_init \
|
||||||
|
nop_macro, /* newline */ \
|
||||||
|
nop_macro /* cleanup */ \
|
||||||
|
nop_macro /* process head */ \
|
||||||
|
FillRect_process_tail
|
||||||
|
|
||||||
|
generate_composite_function \
|
||||||
|
FillRect8ARMSIMDAsm, 0, 0, 8, \
|
||||||
|
FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \
|
||||||
|
0, /* prefetch distance doesn't apply */ \
|
||||||
|
FillRect8_init \
|
||||||
|
nop_macro, /* newline */ \
|
||||||
|
nop_macro /* cleanup */ \
|
||||||
|
nop_macro /* process head */ \
|
||||||
|
FillRect_process_tail
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
|
||||||
/* This differs from the over_8888_8888 routine in Pixman in that the destination
|
/* This differs from the over_8888_8888 routine in Pixman in that the destination
|
||||||
* alpha component is always left unchanged, and RGB components are not
|
* alpha component is always left unchanged, and RGB components are not
|
||||||
* premultiplied by alpha. It differs from BlitRGBtoRGBPixelAlpha in that
|
* premultiplied by alpha. It differs from BlitRGBtoRGBPixelAlpha in that
|
||||||
|
|
Loading…
Reference in a new issue