Fixed bug 4290 - add fastpaths for format conversion in BlitNtoN

All following conversion are faster (no colorkey, no blending).
(ratio isn't very accurate)

ABGR8888 -> ARGB8888 :  faster x6   (2655837 -> 416607)
ABGR8888 -> BGR24 :  faster x7   (2470117 -> 325693)
ABGR8888 -> RGB24 :  faster x7   (2478107 -> 335445)
ABGR8888 -> RGB888 :  faster x9   (3178524 -> 333859)

ARGB8888 -> ABGR8888 :  faster x6   (2648366 -> 406977)
ARGB8888 -> BGR24 :  faster x7   (2474978 -> 327819)
ARGB8888 -> BGR888 :  faster x9   (3189072 -> 326710)
ARGB8888 -> RGB24 :  faster x7   (2473689 -> 324729)

BGR24 -> ABGR8888 :  faster x6   (2268763 -> 359946)
BGR24 -> ARGB8888 :  faster x6   (2306393 -> 359213)
BGR24 -> BGR888 :  faster x6   (2231141 -> 324195)
BGR24 -> RGB24 :  faster x4   (1557835 -> 322033)
BGR24 -> RGB888 :  faster x6   (2229854 -> 323849)

BGR888 -> ARGB8888 :  faster x8   (3215202 -> 363137)
BGR888 -> BGR24 :  faster x7   (2474775 -> 347916)
BGR888 -> RGB24 :  faster x7   (2532783 -> 327354)
BGR888 -> RGB888 :  faster x9   (3134634 -> 344987)

RGB24 -> ABGR8888 :  faster x6   (2229486 -> 358919)
RGB24 -> ARGB8888 :  faster x6   (2271587 -> 358521)
RGB24 -> BGR24 :  faster x4   (1530913 -> 321149)
RGB24 -> BGR888 :  faster x6   (2227284 -> 327453)
RGB24 -> RGB888 :  faster x6   (2227125 -> 329061)

RGB888 -> ABGR8888 :  faster x8   (3163292 -> 362445)
RGB888 -> BGR24 :  faster x7   (2469489 -> 327127)
RGB888 -> BGR888 :  faster x9   (3190526 -> 326022)
RGB888 -> RGB24 :  faster x7   (2479084 -> 324982)
This commit is contained in:
Sylvain Becker 2019-01-30 15:23:33 +01:00
parent 3b4e369365
commit 1128d57316

View file

@ -2515,6 +2515,146 @@ BlitNto2101010(SDL_BlitInfo * info)
}
}
/* Blit_3or4_to_3or4__same_rgb: 3 or 4 bpp, same RGB triplet */
static void
Blit_3or4_to_3or4__same_rgb(SDL_BlitInfo * info)
{
int width = info->dst_w;
int height = info->dst_h;
Uint8 *src = info->src;
int srcskip = info->src_skip;
Uint8 *dst = info->dst;
int dstskip = info->dst_skip;
SDL_PixelFormat *srcfmt = info->src_fmt;
int srcbpp = srcfmt->BytesPerPixel;
SDL_PixelFormat *dstfmt = info->dst_fmt;
int dstbpp = dstfmt->BytesPerPixel;
if (dstfmt->Amask) {
/* SET_ALPHA */
unsigned alpha = info->a;
int alphashift = alpha << 24;
while (height--) {
/* *INDENT-OFF* */
DUFFS_LOOP(
{
Uint32 *dst32 = (Uint32*)dst;
unsigned s0 = src[0];
unsigned s1 = src[1];
unsigned s2 = src[2];
*dst32 = (s0) | (s1 << 8) | (s2 << 16) | alphashift;
dst += dstbpp;
src += srcbpp;
}, width);
/* *INDENT-ON* */
src += srcskip;
dst += dstskip;
}
} else {
/* NO_ALPHA */
while (height--) {
/* *INDENT-OFF* */
DUFFS_LOOP(
{
Uint32 *dst32 = (Uint32*)dst;
unsigned s0 = src[0];
unsigned s1 = src[1];
unsigned s2 = src[2];
*dst32 = (s0) | (s1 << 8) | (s2 << 16);
dst += dstbpp;
src += srcbpp;
}, width);
/* *INDENT-ON* */
src += srcskip;
dst += dstskip;
}
}
return;
}
/* Blit_3or4_to_3or4__inversed_rgb: 3 or 4 bpp, inversed RGB triplet */
static void
Blit_3or4_to_3or4__inversed_rgb(SDL_BlitInfo * info)
{
int width = info->dst_w;
int height = info->dst_h;
Uint8 *src = info->src;
int srcskip = info->src_skip;
Uint8 *dst = info->dst;
int dstskip = info->dst_skip;
SDL_PixelFormat *srcfmt = info->src_fmt;
int srcbpp = srcfmt->BytesPerPixel;
SDL_PixelFormat *dstfmt = info->dst_fmt;
int dstbpp = dstfmt->BytesPerPixel;
if (dstfmt->Amask) {
if (srcfmt->Amask) {
/* COPY_ALPHA */
/* Only to switch ABGR8888 <-> ARGB8888 */
while (height--) {
/* *INDENT-OFF* */
DUFFS_LOOP(
{
Uint32 *dst32 = (Uint32*)dst;
unsigned s0 = src[0];
unsigned s1 = src[1];
unsigned s2 = src[2];
unsigned alphashift = src[3] << 24;
/* inversed, compared to Blit_3or4_to_3or4__same_rgb */
*dst32 = (s0 << 16) | (s1 << 8) | (s2) | alphashift;
dst += dstbpp;
src += srcbpp;
}, width);
/* *INDENT-ON* */
src += srcskip;
dst += dstskip;
}
} else {
/* SET_ALPHA */
unsigned alpha = info->a;
int alphashift = alpha << 24;
while (height--) {
/* *INDENT-OFF* */
DUFFS_LOOP(
{
Uint32 *dst32 = (Uint32*)dst;
unsigned s0 = src[0];
unsigned s1 = src[1];
unsigned s2 = src[2];
/* inversed, compared to Blit_3or4_to_3or4__same_rgb */
*dst32 = (s0 << 16) | (s1 << 8) | (s2) | alphashift;
dst += dstbpp;
src += srcbpp;
}, width);
/* *INDENT-ON* */
src += srcskip;
dst += dstskip;
}
}
} else {
/* NO_ALPHA */
while (height--) {
/* *INDENT-OFF* */
DUFFS_LOOP(
{
Uint32 *dst32 = (Uint32*)dst;
unsigned s0 = src[0];
unsigned s1 = src[1];
unsigned s2 = src[2];
/* inversed, compared to Blit_3or4_to_3or4__same_rgb */
*dst32 = (s0 << 16) | (s1 << 8) | (s2);
dst += dstbpp;
src += srcbpp;
}, width);
/* *INDENT-ON* */
src += srcskip;
dst += dstskip;
}
}
return;
}
/* Normal N to N optimized blitters */
#define NO_ALPHA 1
#define SET_ALPHA 2
@ -2555,6 +2695,23 @@ static const struct blit_table normal_blit_2[] = {
};
static const struct blit_table normal_blit_3[] = {
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
/* 3->4 with same rgb triplet */
{0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
0, Blit_3or4_to_3or4__same_rgb, NO_ALPHA | SET_ALPHA},
{0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
0, Blit_3or4_to_3or4__same_rgb, NO_ALPHA | SET_ALPHA},
/* 3->4 with inversed rgb triplet */
{0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA},
{0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA},
/* 3->3 to switch RGB 24 <-> BGR 24 */
{0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x00FF0000, 0x0000FF00, 0x000000FF,
0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA },
{0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x000000FF, 0x0000FF00, 0x00FF0000,
0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA },
#endif
/* Default for 24-bit RGB source, never optimized */
{0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
};
@ -2571,6 +2728,24 @@ static const struct blit_table normal_blit_4[] = {
{0x00000000, 0x00000000, 0x00000000, 2, 0x0000F800, 0x000007E0, 0x0000001F,
2, Blit_RGB888_RGB565Altivec, NO_ALPHA},
#endif
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
/* 4->3 with same rgb triplet */
{0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x000000FF, 0x0000FF00, 0x00FF0000,
0, Blit_3or4_to_3or4__same_rgb, NO_ALPHA | SET_ALPHA},
{0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x00FF0000, 0x0000FF00, 0x000000FF,
0, Blit_3or4_to_3or4__same_rgb, NO_ALPHA | SET_ALPHA},
/* 4->3 with inversed rgb triplet */
{0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x00FF0000, 0x0000FF00, 0x000000FF,
0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA},
{0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x000000FF, 0x0000FF00, 0x00FF0000,
0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA},
#endif
/* 4->4 with inversed rgb triplet, and COPY_ALPHA to switch ABGR8888 <-> ARGB8888 */
{0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA | COPY_ALPHA},
{0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA | COPY_ALPHA},
/* RBG 888 and RGB 565 */
{0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000F800, 0x000007E0, 0x0000001F,
0, Blit_RGB888_RGB565, NO_ALPHA},
{0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x00007C00, 0x000003E0, 0x0000001F,
@ -2623,7 +2798,7 @@ SDL_CalculateBlitN(SDL_Surface * surface)
}
} else {
/* Now the meat, choose the blitter we want */
int a_need = NO_ALPHA;
Uint32 a_need = NO_ALPHA;
if (dstfmt->Amask)
a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
table = normal_blit[srcfmt->BytesPerPixel - 1];