mirror of
https://github.com/Ryujinx/SDL.git
synced 2024-12-24 13:15:40 +00:00
Working on bug 3921 - Add some Fastpath to BlitNtoNKey and BlitNtoNKeyCopyAlpha
Sylvain I did various benches. with clang 6.0.0 on linux, and ndk-r16b on android (NDK_TOOLCHAIN_VERSION=clang). - still see a x10 speed factor. - with duff_loops, it does not use vectorisation (but doesn't seem to be a problem). on linux my patch is already at full speed on -O2, whereas the duff_loops need -O3 (200 ms at -03, and 300ms at -02). I realized that on Android, I had a slight variation which fits best. both on linux with -O2 and -O3, and on android with 02/03 and armeabi-v7a/arm64. Here's the patch.
This commit is contained in:
parent
922623e1b6
commit
6e35e42145
|
@ -2344,8 +2344,9 @@ BlitNtoNKey(SDL_BlitInfo * info)
|
||||||
/* *INDENT-OFF* */
|
/* *INDENT-OFF* */
|
||||||
DUFFS_LOOP(
|
DUFFS_LOOP(
|
||||||
{
|
{
|
||||||
Uint32 Pixel = (*src32 == ckey) ? *dst32 : *src32;
|
if (*src32 != ckey) {
|
||||||
*dst32 = Pixel;
|
*dst32 = *src32;
|
||||||
|
}
|
||||||
++src32;
|
++src32;
|
||||||
++dst32;
|
++dst32;
|
||||||
},
|
},
|
||||||
|
@ -2418,8 +2419,9 @@ BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info)
|
||||||
/* *INDENT-OFF* */
|
/* *INDENT-OFF* */
|
||||||
DUFFS_LOOP(
|
DUFFS_LOOP(
|
||||||
{
|
{
|
||||||
Uint32 Pixel_ = ((*src32 & rgbmask) == ckey) ? *dst32 : *src32;
|
if ((*src32 & rgbmask) != ckey) {
|
||||||
*dst32 = Pixel_;
|
*dst32 = *src32;
|
||||||
|
}
|
||||||
++src32;
|
++src32;
|
||||||
++dst32;
|
++dst32;
|
||||||
},
|
},
|
||||||
|
|
Loading…
Reference in a new issue