diff --git a/Makefile.os2 b/Makefile.os2
index a063d683e..c6ab898b0 100644
--- a/Makefile.os2
+++ b/Makefile.os2
@@ -81,7 +81,7 @@ SRCS+= SDL_events.c SDL_quit.c SDL_keyboard.c SDL_mouse.c SDL_windowevents.c &
SDL_clipboardevents.c SDL_dropevents.c SDL_displayevents.c SDL_gesture.c &
SDL_sensor.c SDL_touch.c
SRCS+= SDL_haptic.c SDL_hidapi.c SDL_gamecontroller.c SDL_joystick.c controller_type.c
-SRCS+= SDL_render.c yuv_rgb.c SDL_yuv.c SDL_yuv_sw.c SDL_blendfillrect.c &
+SRCS+= SDL_render.c yuv_rgb_lsx.c yuv_rgb_sse.c yuv_rgb_std.c SDL_yuv.c SDL_yuv_sw.c SDL_blendfillrect.c &
SDL_blendline.c SDL_blendpoint.c SDL_drawline.c SDL_drawpoint.c &
SDL_render_sw.c SDL_rotate.c SDL_triangle.c
SRCS+= SDL_blit.c SDL_blit_0.c SDL_blit_1.c SDL_blit_A.c SDL_blit_auto.c &
@@ -158,6 +158,9 @@ SDL_hidapi.obj: SDL_hidapi.c
wcc386 $(CFLAGS_DLL) -za99 -fo=$^@ $<
!endif
+yuv_rgb_sse.obj: yuv_rgb_sse.c
+ wcc386 $(CFLAGS_DLL) -wcd=202 -fo=$^@ $<
+
$(LIBICONV_LIB): "src/core/os2/iconv2.lbc"
@echo * Creating: $@
wlib -q -b -n -c -pa -s -t -zld -ii -io $@ @$<
diff --git a/Makefile.w32 b/Makefile.w32
index 24efceb02..9567f4abf 100644
--- a/Makefile.w32
+++ b/Makefile.w32
@@ -60,7 +60,7 @@ SRCS+= SDL_events.c SDL_quit.c SDL_keyboard.c SDL_mouse.c SDL_windowevents.c &
SDL_clipboardevents.c SDL_dropevents.c SDL_displayevents.c SDL_gesture.c &
SDL_sensor.c SDL_touch.c
SRCS+= SDL_haptic.c SDL_hidapi.c SDL_gamecontroller.c SDL_joystick.c controller_type.c
-SRCS+= SDL_render.c yuv_rgb.c SDL_yuv.c SDL_yuv_sw.c SDL_blendfillrect.c &
+SRCS+= SDL_render.c yuv_rgb_lsx.c yuv_rgb_sse.c yuv_rgb_std.c SDL_yuv.c SDL_yuv_sw.c SDL_blendfillrect.c &
SDL_blendline.c SDL_blendpoint.c SDL_drawline.c SDL_drawpoint.c &
SDL_render_sw.c SDL_rotate.c SDL_triangle.c
SRCS+= SDL_blit.c SDL_blit_0.c SDL_blit_1.c SDL_blit_A.c SDL_blit_auto.c &
@@ -147,6 +147,9 @@ SDL_RLEaccel.obj: SDL_RLEaccel.c
SDL_malloc.obj: SDL_malloc.c
wcc386 $(CFLAGS_DLL) -wcd=201 -fo=$^@ $<
+yuv_rgb_sse.obj: yuv_rgb_sse.c
+ wcc386 $(CFLAGS_DLL) -wcd=202 -fo=$^@ $<
+
# SDL2libm
MSRCS= e_atan2.c e_exp.c e_fmod.c e_log10.c e_log.c e_pow.c e_rem_pio2.c e_sqrt.c &
k_cos.c k_rem_pio2.c k_sin.c k_tan.c &
diff --git a/VisualC-GDK/SDL/SDL.vcxproj b/VisualC-GDK/SDL/SDL.vcxproj
index 6aee7ccb9..ed071aa9d 100644
--- a/VisualC-GDK/SDL/SDL.vcxproj
+++ b/VisualC-GDK/SDL/SDL.vcxproj
@@ -520,7 +520,13 @@
+
+
+
+
+
+
@@ -755,7 +761,9 @@
-
+
+
+
diff --git a/VisualC-WinRT/SDL-UWP.vcxproj b/VisualC-WinRT/SDL-UWP.vcxproj
index 20d4e6368..696505f35 100644
--- a/VisualC-WinRT/SDL-UWP.vcxproj
+++ b/VisualC-WinRT/SDL-UWP.vcxproj
@@ -185,6 +185,14 @@
+
+
+
+
+
+
+
+
@@ -346,7 +354,9 @@
true
-
+
+
+
{89e9b32e-a86a-47c3-a948-d2b1622925ce}
diff --git a/VisualC/SDL/SDL.vcxproj b/VisualC/SDL/SDL.vcxproj
index 24f332939..d52b5c058 100644
--- a/VisualC/SDL/SDL.vcxproj
+++ b/VisualC/SDL/SDL.vcxproj
@@ -439,7 +439,13 @@
+
+
+
+
+
+
@@ -624,7 +630,9 @@
-
+
+
+
diff --git a/src/video/yuv2rgb/yuv_rgb.h b/src/video/yuv2rgb/yuv_rgb.h
index 5668c0fc4..c3593168f 100644
--- a/src/video/yuv2rgb/yuv_rgb.h
+++ b/src/video/yuv2rgb/yuv_rgb.h
@@ -1,412 +1,33 @@
+#ifndef YUV_RGB_H_
+#define YUV_RGB_H_
+
// Copyright 2016 Adrien Descamps
// Distributed under BSD 3-Clause License
// Provide optimized functions to convert images from 8bits yuv420 to rgb24 format
-// There are a few slightly different variations of the YCbCr color space with different parameters that
+// There are a few slightly different variations of the YCbCr color space with different parameters that
// change the conversion matrix.
// The three most common YCbCr color space, defined by BT.601, BT.709 and JPEG standard are implemented here.
// See the respective standards for details
// The matrix values used are derived from http://www.equasys.de/colorconversion.html
// YUV420 is stored as three separate channels, with U and V (Cb and Cr) subsampled by a 2 factor
-// For conversion from yuv to rgb, no interpolation is done, and the same UV value are used for 4 rgb pixels. This
+// For conversion from yuv to rgb, no interpolation is done, and the same UV value are used for 4 rgb pixels. This
// is suboptimal for image quality, but by far the fastest method.
// For all methods, width and height should be even, if not, the last row/column of the result image won't be affected.
// For sse methods, if the width if not divisable by 32, the last (width%32) pixels of each line won't be affected.
-#include "SDL_stdinc.h"
/*#include */
-typedef enum
-{
- YCBCR_JPEG,
- YCBCR_601,
- YCBCR_709
-} YCbCrType;
-
// yuv to rgb, standard c implementation
-void yuv420_rgb565_std(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
+#include "yuv_rgb_std.h"
-void yuv420_rgb24_std(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
+// yuv to rgb, sse2 implementation
+#include "yuv_rgb_sse.h"
-void yuv420_rgba_std(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
+// yuv to rgb, lsx implementation
+#include "yuv_rgb_lsx.h"
-void yuv420_bgra_std(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv420_argb_std(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv420_abgr_std(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv422_rgb565_std(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv422_rgb24_std(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv422_rgba_std(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv422_bgra_std(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv422_argb_std(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv422_abgr_std(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuvnv12_rgb565_std(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuvnv12_rgb24_std(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuvnv12_rgba_std(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuvnv12_bgra_std(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuvnv12_argb_std(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuvnv12_abgr_std(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-// yuv to rgb, sse implementation
-// pointers must be 16 byte aligned, and strides must be divisable by 16
-void yuv420_rgb565_sse(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv420_rgb24_sse(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv420_rgba_sse(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv420_bgra_sse(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv420_argb_sse(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv420_abgr_sse(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv422_rgb565_sse(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv422_rgb24_sse(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv422_rgba_sse(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv422_bgra_sse(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv422_argb_sse(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv422_abgr_sse(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuvnv12_rgb565_sse(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuvnv12_rgb24_sse(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuvnv12_rgba_sse(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuvnv12_bgra_sse(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuvnv12_argb_sse(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuvnv12_abgr_sse(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-// yuv to rgb, sse implementation
-// pointers do not need to be 16 byte aligned
-void yuv420_rgb565_sseu(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv420_rgb24_sseu(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv420_rgba_sseu(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv420_bgra_sseu(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv420_argb_sseu(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv420_abgr_sseu(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv422_rgb565_sseu(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv422_rgb24_sseu(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv422_rgba_sseu(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv422_bgra_sseu(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv422_argb_sseu(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv422_abgr_sseu(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuvnv12_rgb565_sseu(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuvnv12_rgb24_sseu(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuvnv12_rgba_sseu(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuvnv12_bgra_sseu(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuvnv12_argb_sseu(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuvnv12_abgr_sseu(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-
-// rgb to yuv, standard c implementation
-void rgb24_yuv420_std(
- uint32_t width, uint32_t height,
- const uint8_t *rgb, uint32_t rgb_stride,
- uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- YCbCrType yuv_type);
-
-// rgb to yuv, sse implementation
-// pointers must be 16 byte aligned, and strides must be divisible by 16
-void rgb24_yuv420_sse(
- uint32_t width, uint32_t height,
- const uint8_t *rgb, uint32_t rgb_stride,
- uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- YCbCrType yuv_type);
-
-// rgb to yuv, sse implementation
-// pointers do not need to be 16 byte aligned
-void rgb24_yuv420_sseu(
- uint32_t width, uint32_t height,
- const uint8_t *rgb, uint32_t rgb_stride,
- uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- YCbCrType yuv_type);
-
-
-//yuv420 to bgra, lsx implementation
-void yuv420_rgb24_lsx(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv420_rgba_lsx(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *v, const uint8_t *u, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv420_bgra_lsx(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *v, const uint8_t *u, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv420_argb_lsx(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *v, const uint8_t *u, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
-
-void yuv420_abgr_lsx(
- uint32_t width, uint32_t height,
- const uint8_t *y, const uint8_t *v, const uint8_t *u, uint32_t y_stride, uint32_t uv_stride,
- uint8_t *rgb, uint32_t rgb_stride,
- YCbCrType yuv_type);
+#endif /* YUV_RGB_H_ */
diff --git a/src/video/yuv2rgb/yuv_rgb_common.h b/src/video/yuv2rgb/yuv_rgb_common.h
new file mode 100644
index 000000000..ae787ed5f
--- /dev/null
+++ b/src/video/yuv2rgb/yuv_rgb_common.h
@@ -0,0 +1,13 @@
+#ifndef YUV_RGB_COMMON_H_
+#define YUV_RGB_COMMON_H_
+// Copyright 2016 Adrien Descamps
+// Distributed under BSD 3-Clause License
+
+typedef enum
+{
+ YCBCR_JPEG,
+ YCBCR_601,
+ YCBCR_709
+} YCbCrType;
+
+#endif /* YUV_RGB_COMMON_H_ */
diff --git a/src/video/yuv2rgb/yuv_rgb_internal.h b/src/video/yuv2rgb/yuv_rgb_internal.h
new file mode 100644
index 000000000..cad978b5f
--- /dev/null
+++ b/src/video/yuv2rgb/yuv_rgb_internal.h
@@ -0,0 +1,74 @@
+// Copyright 2016 Adrien Descamps
+// Distributed under BSD 3-Clause License
+
+#define PRECISION 6
+#define PRECISION_FACTOR (1<[0-255])
+// for ITU-R BT.709-6 values are derived from equations in sections 3.2-3.4, assuming RGB is encoded using full range ([0-1]<->[0-255])
+// all values are rounded to the fourth decimal
+
+static const YUV2RGBParam YUV2RGB[3] = {
+ // ITU-T T.871 (JPEG)
+ {/*.y_shift=*/ 0, /*.y_factor=*/ V(1.0), /*.v_r_factor=*/ V(1.402), /*.u_g_factor=*/ -V(0.3441), /*.v_g_factor=*/ -V(0.7141), /*.u_b_factor=*/ V(1.772)},
+ // ITU-R BT.601-7
+ {/*.y_shift=*/ 16, /*.y_factor=*/ V(1.1644), /*.v_r_factor=*/ V(1.596), /*.u_g_factor=*/ -V(0.3918), /*.v_g_factor=*/ -V(0.813), /*.u_b_factor=*/ V(2.0172)},
+ // ITU-R BT.709-6
+ {/*.y_shift=*/ 16, /*.y_factor=*/ V(1.1644), /*.v_r_factor=*/ V(1.7927), /*.u_g_factor=*/ -V(0.2132), /*.v_g_factor=*/ -V(0.5329), /*.u_b_factor=*/ V(2.1124)}
+};
+
+static const RGB2YUVParam RGB2YUV[3] = {
+ // ITU-T T.871 (JPEG)
+ {/*.y_shift=*/ 0, /*.matrix=*/ {{V(0.299), V(0.587), V(0.114)}, {-V(0.1687), -V(0.3313), V(0.5)}, {V(0.5), -V(0.4187), -V(0.0813)}}},
+ // ITU-R BT.601-7
+ {/*.y_shift=*/ 16, /*.matrix=*/ {{V(0.2568), V(0.5041), V(0.0979)}, {-V(0.1482), -V(0.291), V(0.4392)}, {V(0.4392), -V(0.3678), -V(0.0714)}}},
+ // ITU-R BT.709-6
+ {/*.y_shift=*/ 16, /*.matrix=*/ {{V(0.1826), V(0.6142), V(0.062)}, {-V(0.1006), -V(0.3386), V(0.4392)}, {V(0.4392), -V(0.3989), -V(0.0403)}}}
+};
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+/* The various layouts of YUV data we support */
+#define YUV_FORMAT_420 1
+#define YUV_FORMAT_422 2
+#define YUV_FORMAT_NV12 3
+
+/* The various formats of RGB pixel that we support */
+#define RGB_FORMAT_RGB565 1
+#define RGB_FORMAT_RGB24 2
+#define RGB_FORMAT_RGBA 3
+#define RGB_FORMAT_BGRA 4
+#define RGB_FORMAT_ARGB 5
+#define RGB_FORMAT_ABGR 6
diff --git a/src/video/yuv2rgb/yuv_rgb_lsx.c b/src/video/yuv2rgb/yuv_rgb_lsx.c
new file mode 100644
index 000000000..8d84dd142
--- /dev/null
+++ b/src/video/yuv2rgb/yuv_rgb_lsx.c
@@ -0,0 +1,44 @@
+// Copyright 2016 Adrien Descamps
+// Distributed under BSD 3-Clause License
+#include "../../SDL_internal.h"
+
+#if SDL_HAVE_YUV
+#include "yuv_rgb.h"
+
+#include "SDL_cpuinfo.h"
+
+#ifdef __loongarch_sx
+
+#define LSX_FUNCTION_NAME yuv420_rgb24_lsx
+#define STD_FUNCTION_NAME yuv420_rgb24_std
+#define YUV_FORMAT YUV_FORMAT_420
+#define RGB_FORMAT RGB_FORMAT_RGB24
+#include "yuv_rgb_lsx_func.h"
+
+#define LSX_FUNCTION_NAME yuv420_rgba_lsx
+#define STD_FUNCTION_NAME yuv420_rgba_std
+#define YUV_FORMAT YUV_FORMAT_420
+#define RGB_FORMAT RGB_FORMAT_RGBA
+#include "yuv_rgb_lsx_func.h"
+
+#define LSX_FUNCTION_NAME yuv420_bgra_lsx
+#define STD_FUNCTION_NAME yuv420_bgra_std
+#define YUV_FORMAT YUV_FORMAT_420
+#define RGB_FORMAT RGB_FORMAT_BGRA
+#include "yuv_rgb_lsx_func.h"
+
+#define LSX_FUNCTION_NAME yuv420_argb_lsx
+#define STD_FUNCTION_NAME yuv420_argb_std
+#define YUV_FORMAT YUV_FORMAT_420
+#define RGB_FORMAT RGB_FORMAT_ARGB
+#include "yuv_rgb_lsx_func.h"
+
+#define LSX_FUNCTION_NAME yuv420_abgr_lsx
+#define STD_FUNCTION_NAME yuv420_abgr_std
+#define YUV_FORMAT YUV_FORMAT_420
+#define RGB_FORMAT RGB_FORMAT_ABGR
+#include "yuv_rgb_lsx_func.h"
+
+#endif //__loongarch_sx
+
+#endif /* SDL_HAVE_YUV */
diff --git a/src/video/yuv2rgb/yuv_rgb_lsx.h b/src/video/yuv2rgb/yuv_rgb_lsx.h
new file mode 100644
index 000000000..bcffd95c6
--- /dev/null
+++ b/src/video/yuv2rgb/yuv_rgb_lsx.h
@@ -0,0 +1,407 @@
+// Copyright 2016 Adrien Descamps
+// Distributed under BSD 3-Clause License
+
+// Provide optimized functions to convert images from 8bits yuv420 to rgb24 format
+
+// There are a few slightly different variations of the YCbCr color space with different parameters that
+// change the conversion matrix.
+// The three most common YCbCr color space, defined by BT.601, BT.709 and JPEG standard are implemented here.
+// See the respective standards for details
+// The matrix values used are derived from http://www.equasys.de/colorconversion.html
+
+// YUV420 is stored as three separate channels, with U and V (Cb and Cr) subsampled by a 2 factor
+// For conversion from yuv to rgb, no interpolation is done, and the same UV value are used for 4 rgb pixels. This
+// is suboptimal for image quality, but by far the fastest method.
+
+// For all methods, width and height should be even, if not, the last row/column of the result image won't be affected.
+// For sse methods, if the width if not divisable by 32, the last (width%32) pixels of each line won't be affected.
+
+/*#include */
+#include "yuv_rgb_common.h"
+
+#include "SDL_stdinc.h"
+
+// yuv to rgb, standard c implementation
+void yuv420_rgb565_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_rgb24_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_rgba_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_bgra_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_argb_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_abgr_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_rgb565_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_rgb24_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_rgba_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_bgra_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_argb_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_abgr_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_rgb565_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_rgb24_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_rgba_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_bgra_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_argb_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_abgr_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+// yuv to rgb, sse implementation
+// pointers must be 16 byte aligned, and strides must be divisable by 16
+void yuv420_rgb565_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_rgb24_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_rgba_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_bgra_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_argb_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_abgr_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_rgb565_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_rgb24_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_rgba_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_bgra_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_argb_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_abgr_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_rgb565_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_rgb24_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_rgba_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_bgra_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_argb_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_abgr_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+// yuv to rgb, sse implementation
+// pointers do not need to be 16 byte aligned
+void yuv420_rgb565_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_rgb24_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_rgba_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_bgra_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_argb_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_abgr_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_rgb565_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_rgb24_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_rgba_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_bgra_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_argb_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_abgr_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_rgb565_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_rgb24_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_rgba_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_bgra_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_argb_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_abgr_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+
+// rgb to yuv, standard c implementation
+void rgb24_yuv420_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *rgb, uint32_t rgb_stride,
+ uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ YCbCrType yuv_type);
+
+// rgb to yuv, sse implementation
+// pointers must be 16 byte aligned, and strides must be divisible by 16
+void rgb24_yuv420_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *rgb, uint32_t rgb_stride,
+ uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ YCbCrType yuv_type);
+
+// rgb to yuv, sse implementation
+// pointers do not need to be 16 byte aligned
+void rgb24_yuv420_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *rgb, uint32_t rgb_stride,
+ uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ YCbCrType yuv_type);
+
+
+//yuv420 to bgra, lsx implementation
+void yuv420_rgb24_lsx(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_rgba_lsx(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *v, const uint8_t *u, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_bgra_lsx(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *v, const uint8_t *u, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_argb_lsx(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *v, const uint8_t *u, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_abgr_lsx(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *v, const uint8_t *u, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
diff --git a/src/video/yuv2rgb/yuv_rgb.c b/src/video/yuv2rgb/yuv_rgb_sse.c
similarity index 53%
rename from src/video/yuv2rgb/yuv_rgb.c
rename to src/video/yuv2rgb/yuv_rgb_sse.c
index 71bcde9c6..b22a89fa4 100644
--- a/src/video/yuv2rgb/yuv_rgb.c
+++ b/src/video/yuv2rgb/yuv_rgb_sse.c
@@ -3,254 +3,12 @@
#include "../../SDL_internal.h"
#if SDL_HAVE_YUV
-
#include "yuv_rgb.h"
+#include "yuv_rgb_internal.h"
#include "SDL_cpuinfo.h"
/*#include */
-#define PRECISION 6
-#define PRECISION_FACTOR (1<[0-255])
-// for ITU-R BT.709-6 values are derived from equations in sections 3.2-3.4, assuming RGB is encoded using full range ([0-1]<->[0-255])
-// all values are rounded to the fourth decimal
-
-static const YUV2RGBParam YUV2RGB[3] = {
- // ITU-T T.871 (JPEG)
- {/*.y_shift=*/ 0, /*.y_factor=*/ V(1.0), /*.v_r_factor=*/ V(1.402), /*.u_g_factor=*/ -V(0.3441), /*.v_g_factor=*/ -V(0.7141), /*.u_b_factor=*/ V(1.772)},
- // ITU-R BT.601-7
- {/*.y_shift=*/ 16, /*.y_factor=*/ V(1.1644), /*.v_r_factor=*/ V(1.596), /*.u_g_factor=*/ -V(0.3918), /*.v_g_factor=*/ -V(0.813), /*.u_b_factor=*/ V(2.0172)},
- // ITU-R BT.709-6
- {/*.y_shift=*/ 16, /*.y_factor=*/ V(1.1644), /*.v_r_factor=*/ V(1.7927), /*.u_g_factor=*/ -V(0.2132), /*.v_g_factor=*/ -V(0.5329), /*.u_b_factor=*/ V(2.1124)}
-};
-
-static const RGB2YUVParam RGB2YUV[3] = {
- // ITU-T T.871 (JPEG)
- {/*.y_shift=*/ 0, /*.matrix=*/ {{V(0.299), V(0.587), V(0.114)}, {-V(0.1687), -V(0.3313), V(0.5)}, {V(0.5), -V(0.4187), -V(0.0813)}}},
- // ITU-R BT.601-7
- {/*.y_shift=*/ 16, /*.matrix=*/ {{V(0.2568), V(0.5041), V(0.0979)}, {-V(0.1482), -V(0.291), V(0.4392)}, {V(0.4392), -V(0.3678), -V(0.0714)}}},
- // ITU-R BT.709-6
- {/*.y_shift=*/ 16, /*.matrix=*/ {{V(0.1826), V(0.6142), V(0.062)}, {-V(0.1006), -V(0.3386), V(0.4392)}, {V(0.4392), -V(0.3989), -V(0.0403)}}}
-};
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
-/* The various layouts of YUV data we support */
-#define YUV_FORMAT_420 1
-#define YUV_FORMAT_422 2
-#define YUV_FORMAT_NV12 3
-
-/* The various formats of RGB pixel that we support */
-#define RGB_FORMAT_RGB565 1
-#define RGB_FORMAT_RGB24 2
-#define RGB_FORMAT_RGBA 3
-#define RGB_FORMAT_BGRA 4
-#define RGB_FORMAT_ARGB 5
-#define RGB_FORMAT_ABGR 6
-
-// divide by PRECISION_FACTOR and clamp to [0:255] interval
-// input must be in the [-128*PRECISION_FACTOR:384*PRECISION_FACTOR] range
-static uint8_t clampU8(int32_t v)
-{
- static const uint8_t lut[512] =
- {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,
- 47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,
- 91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,
- 126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,
- 159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
- 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,
- 225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
- 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
- 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
- 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
- 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255
- };
- return lut[((v+128*PRECISION_FACTOR)>>PRECISION)&511];
-}
-
-
-#define STD_FUNCTION_NAME yuv420_rgb565_std
-#define YUV_FORMAT YUV_FORMAT_420
-#define RGB_FORMAT RGB_FORMAT_RGB565
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME yuv420_rgb24_std
-#define YUV_FORMAT YUV_FORMAT_420
-#define RGB_FORMAT RGB_FORMAT_RGB24
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME yuv420_rgba_std
-#define YUV_FORMAT YUV_FORMAT_420
-#define RGB_FORMAT RGB_FORMAT_RGBA
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME yuv420_bgra_std
-#define YUV_FORMAT YUV_FORMAT_420
-#define RGB_FORMAT RGB_FORMAT_BGRA
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME yuv420_argb_std
-#define YUV_FORMAT YUV_FORMAT_420
-#define RGB_FORMAT RGB_FORMAT_ARGB
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME yuv420_abgr_std
-#define YUV_FORMAT YUV_FORMAT_420
-#define RGB_FORMAT RGB_FORMAT_ABGR
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME yuv422_rgb565_std
-#define YUV_FORMAT YUV_FORMAT_422
-#define RGB_FORMAT RGB_FORMAT_RGB565
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME yuv422_rgb24_std
-#define YUV_FORMAT YUV_FORMAT_422
-#define RGB_FORMAT RGB_FORMAT_RGB24
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME yuv422_rgba_std
-#define YUV_FORMAT YUV_FORMAT_422
-#define RGB_FORMAT RGB_FORMAT_RGBA
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME yuv422_bgra_std
-#define YUV_FORMAT YUV_FORMAT_422
-#define RGB_FORMAT RGB_FORMAT_BGRA
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME yuv422_argb_std
-#define YUV_FORMAT YUV_FORMAT_422
-#define RGB_FORMAT RGB_FORMAT_ARGB
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME yuv422_abgr_std
-#define YUV_FORMAT YUV_FORMAT_422
-#define RGB_FORMAT RGB_FORMAT_ABGR
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME yuvnv12_rgb565_std
-#define YUV_FORMAT YUV_FORMAT_NV12
-#define RGB_FORMAT RGB_FORMAT_RGB565
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME yuvnv12_rgb24_std
-#define YUV_FORMAT YUV_FORMAT_NV12
-#define RGB_FORMAT RGB_FORMAT_RGB24
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME yuvnv12_rgba_std
-#define YUV_FORMAT YUV_FORMAT_NV12
-#define RGB_FORMAT RGB_FORMAT_RGBA
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME yuvnv12_bgra_std
-#define YUV_FORMAT YUV_FORMAT_NV12
-#define RGB_FORMAT RGB_FORMAT_BGRA
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME yuvnv12_argb_std
-#define YUV_FORMAT YUV_FORMAT_NV12
-#define RGB_FORMAT RGB_FORMAT_ARGB
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME yuvnv12_abgr_std
-#define YUV_FORMAT YUV_FORMAT_NV12
-#define RGB_FORMAT RGB_FORMAT_ABGR
-#include "yuv_rgb_std_func.h"
-
-void rgb24_yuv420_std(
- uint32_t width, uint32_t height,
- const uint8_t *RGB, uint32_t RGB_stride,
- uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride,
- YCbCrType yuv_type)
-{
- const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]);
-
- uint32_t x, y;
- for(y=0; y<(height-1); y+=2)
- {
- const uint8_t *rgb_ptr1=RGB+y*RGB_stride,
- *rgb_ptr2=RGB+(y+1)*RGB_stride;
-
- uint8_t *y_ptr1=Y+y*Y_stride,
- *y_ptr2=Y+(y+1)*Y_stride,
- *u_ptr=U+(y/2)*UV_stride,
- *v_ptr=V+(y/2)*UV_stride;
-
- for(x=0; x<(width-1); x+=2)
- {
- // compute yuv for the four pixels, u and v values are summed
- int32_t y_tmp, u_tmp, v_tmp;
-
- y_tmp = param->matrix[0][0]*rgb_ptr1[0] + param->matrix[0][1]*rgb_ptr1[1] + param->matrix[0][2]*rgb_ptr1[2];
- u_tmp = param->matrix[1][0]*rgb_ptr1[0] + param->matrix[1][1]*rgb_ptr1[1] + param->matrix[1][2]*rgb_ptr1[2];
- v_tmp = param->matrix[2][0]*rgb_ptr1[0] + param->matrix[2][1]*rgb_ptr1[1] + param->matrix[2][2]*rgb_ptr1[2];
- y_ptr1[0]=clampU8(y_tmp+((param->y_shift)<matrix[0][0]*rgb_ptr1[3] + param->matrix[0][1]*rgb_ptr1[4] + param->matrix[0][2]*rgb_ptr1[5];
- u_tmp += param->matrix[1][0]*rgb_ptr1[3] + param->matrix[1][1]*rgb_ptr1[4] + param->matrix[1][2]*rgb_ptr1[5];
- v_tmp += param->matrix[2][0]*rgb_ptr1[3] + param->matrix[2][1]*rgb_ptr1[4] + param->matrix[2][2]*rgb_ptr1[5];
- y_ptr1[1]=clampU8(y_tmp+((param->y_shift)<matrix[0][0]*rgb_ptr2[0] + param->matrix[0][1]*rgb_ptr2[1] + param->matrix[0][2]*rgb_ptr2[2];
- u_tmp += param->matrix[1][0]*rgb_ptr2[0] + param->matrix[1][1]*rgb_ptr2[1] + param->matrix[1][2]*rgb_ptr2[2];
- v_tmp += param->matrix[2][0]*rgb_ptr2[0] + param->matrix[2][1]*rgb_ptr2[1] + param->matrix[2][2]*rgb_ptr2[2];
- y_ptr2[0]=clampU8(y_tmp+((param->y_shift)<matrix[0][0]*rgb_ptr2[3] + param->matrix[0][1]*rgb_ptr2[4] + param->matrix[0][2]*rgb_ptr2[5];
- u_tmp += param->matrix[1][0]*rgb_ptr2[3] + param->matrix[1][1]*rgb_ptr2[4] + param->matrix[1][2]*rgb_ptr2[5];
- v_tmp += param->matrix[2][0]*rgb_ptr2[3] + param->matrix[2][1]*rgb_ptr2[4] + param->matrix[2][2]*rgb_ptr2[5];
- y_ptr2[1]=clampU8(y_tmp+((param->y_shift)<matrix[0][0])), \
- _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[0][1]))); \
+ _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[0][1]))); \
Y = _mm_add_epi16(Y, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[0][2]))); \
Y = _mm_add_epi16(Y, _mm_set1_epi16((param->y_shift)<matrix[1][0])), \
- _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[1][1]))); \
+ _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[1][1]))); \
U = _mm_add_epi16(U, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[1][2]))); \
U = _mm_add_epi16(U, _mm_set1_epi16(128<matrix[2][0])), \
- _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[2][1]))); \
+ _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[2][1]))); \
V = _mm_add_epi16(V, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[2][2]))); \
V = _mm_add_epi16(V, _mm_set1_epi16(128<*/
+#include "yuv_rgb_common.h"
+
+#include "SDL_stdinc.h"
+
+// yuv to rgb, sse implementation
+// pointers must be 16 byte aligned, and strides must be divisable by 16
+void yuv420_rgb565_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_rgb24_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_rgba_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_bgra_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_argb_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_abgr_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_rgb565_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_rgb24_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_rgba_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_bgra_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_argb_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_abgr_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_rgb565_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_rgb24_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_rgba_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_bgra_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_argb_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_abgr_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+// yuv to rgb, sse implementation
+// pointers do not need to be 16 byte aligned
+void yuv420_rgb565_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_rgb24_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_rgba_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_bgra_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_argb_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_abgr_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_rgb565_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_rgb24_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_rgba_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_bgra_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_argb_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_abgr_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_rgb565_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_rgb24_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_rgba_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_bgra_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_argb_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_abgr_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+
+// rgb to yuv, standard c implementation
+void rgb24_yuv420_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *rgb, uint32_t rgb_stride,
+ uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ YCbCrType yuv_type);
+
+// rgb to yuv, sse implementation
+// pointers must be 16 byte aligned, and strides must be divisible by 16
+void rgb24_yuv420_sse(
+ uint32_t width, uint32_t height,
+ const uint8_t *rgb, uint32_t rgb_stride,
+ uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ YCbCrType yuv_type);
+
+// rgb to yuv, sse implementation
+// pointers do not need to be 16 byte aligned
+void rgb24_yuv420_sseu(
+ uint32_t width, uint32_t height,
+ const uint8_t *rgb, uint32_t rgb_stride,
+ uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ YCbCrType yuv_type);
diff --git a/src/video/yuv2rgb/yuv_rgb_std.c b/src/video/yuv2rgb/yuv_rgb_std.c
new file mode 100644
index 000000000..a222a3abb
--- /dev/null
+++ b/src/video/yuv2rgb/yuv_rgb_std.c
@@ -0,0 +1,179 @@
+// Copyright 2016 Adrien Descamps
+// Distributed under BSD 3-Clause License
+#include "../../SDL_internal.h"
+
+#if SDL_HAVE_YUV
+#include "yuv_rgb.h"
+#include "yuv_rgb_internal.h"
+
+// divide by PRECISION_FACTOR and clamp to [0:255] interval
+// input must be in the [-128*PRECISION_FACTOR:384*PRECISION_FACTOR] range
+static uint8_t clampU8(int32_t v)
+{
+ static const uint8_t lut[512] =
+ {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,
+ 47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,
+ 91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,
+ 126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,
+ 159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
+ 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,
+ 225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255
+ };
+ return lut[((v+128*PRECISION_FACTOR)>>PRECISION)&511];
+}
+
+
+#define STD_FUNCTION_NAME yuv420_rgb565_std
+#define YUV_FORMAT YUV_FORMAT_420
+#define RGB_FORMAT RGB_FORMAT_RGB565
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME yuv420_rgb24_std
+#define YUV_FORMAT YUV_FORMAT_420
+#define RGB_FORMAT RGB_FORMAT_RGB24
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME yuv420_rgba_std
+#define YUV_FORMAT YUV_FORMAT_420
+#define RGB_FORMAT RGB_FORMAT_RGBA
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME yuv420_bgra_std
+#define YUV_FORMAT YUV_FORMAT_420
+#define RGB_FORMAT RGB_FORMAT_BGRA
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME yuv420_argb_std
+#define YUV_FORMAT YUV_FORMAT_420
+#define RGB_FORMAT RGB_FORMAT_ARGB
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME yuv420_abgr_std
+#define YUV_FORMAT YUV_FORMAT_420
+#define RGB_FORMAT RGB_FORMAT_ABGR
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME yuv422_rgb565_std
+#define YUV_FORMAT YUV_FORMAT_422
+#define RGB_FORMAT RGB_FORMAT_RGB565
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME yuv422_rgb24_std
+#define YUV_FORMAT YUV_FORMAT_422
+#define RGB_FORMAT RGB_FORMAT_RGB24
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME yuv422_rgba_std
+#define YUV_FORMAT YUV_FORMAT_422
+#define RGB_FORMAT RGB_FORMAT_RGBA
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME yuv422_bgra_std
+#define YUV_FORMAT YUV_FORMAT_422
+#define RGB_FORMAT RGB_FORMAT_BGRA
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME yuv422_argb_std
+#define YUV_FORMAT YUV_FORMAT_422
+#define RGB_FORMAT RGB_FORMAT_ARGB
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME yuv422_abgr_std
+#define YUV_FORMAT YUV_FORMAT_422
+#define RGB_FORMAT RGB_FORMAT_ABGR
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME yuvnv12_rgb565_std
+#define YUV_FORMAT YUV_FORMAT_NV12
+#define RGB_FORMAT RGB_FORMAT_RGB565
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME yuvnv12_rgb24_std
+#define YUV_FORMAT YUV_FORMAT_NV12
+#define RGB_FORMAT RGB_FORMAT_RGB24
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME yuvnv12_rgba_std
+#define YUV_FORMAT YUV_FORMAT_NV12
+#define RGB_FORMAT RGB_FORMAT_RGBA
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME yuvnv12_bgra_std
+#define YUV_FORMAT YUV_FORMAT_NV12
+#define RGB_FORMAT RGB_FORMAT_BGRA
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME yuvnv12_argb_std
+#define YUV_FORMAT YUV_FORMAT_NV12
+#define RGB_FORMAT RGB_FORMAT_ARGB
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME yuvnv12_abgr_std
+#define YUV_FORMAT YUV_FORMAT_NV12
+#define RGB_FORMAT RGB_FORMAT_ABGR
+#include "yuv_rgb_std_func.h"
+
+void rgb24_yuv420_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *RGB, uint32_t RGB_stride,
+ uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride,
+ YCbCrType yuv_type)
+{
+ const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]);
+
+ uint32_t x, y;
+ for(y=0; y<(height-1); y+=2)
+ {
+ const uint8_t *rgb_ptr1=RGB+y*RGB_stride,
+ *rgb_ptr2=RGB+(y+1)*RGB_stride;
+
+ uint8_t *y_ptr1=Y+y*Y_stride,
+ *y_ptr2=Y+(y+1)*Y_stride,
+ *u_ptr=U+(y/2)*UV_stride,
+ *v_ptr=V+(y/2)*UV_stride;
+
+ for(x=0; x<(width-1); x+=2)
+ {
+ // compute yuv for the four pixels, u and v values are summed
+ int32_t y_tmp, u_tmp, v_tmp;
+
+ y_tmp = param->matrix[0][0]*rgb_ptr1[0] + param->matrix[0][1]*rgb_ptr1[1] + param->matrix[0][2]*rgb_ptr1[2];
+ u_tmp = param->matrix[1][0]*rgb_ptr1[0] + param->matrix[1][1]*rgb_ptr1[1] + param->matrix[1][2]*rgb_ptr1[2];
+ v_tmp = param->matrix[2][0]*rgb_ptr1[0] + param->matrix[2][1]*rgb_ptr1[1] + param->matrix[2][2]*rgb_ptr1[2];
+ y_ptr1[0]=clampU8(y_tmp+((param->y_shift)<matrix[0][0]*rgb_ptr1[3] + param->matrix[0][1]*rgb_ptr1[4] + param->matrix[0][2]*rgb_ptr1[5];
+ u_tmp += param->matrix[1][0]*rgb_ptr1[3] + param->matrix[1][1]*rgb_ptr1[4] + param->matrix[1][2]*rgb_ptr1[5];
+ v_tmp += param->matrix[2][0]*rgb_ptr1[3] + param->matrix[2][1]*rgb_ptr1[4] + param->matrix[2][2]*rgb_ptr1[5];
+ y_ptr1[1]=clampU8(y_tmp+((param->y_shift)<matrix[0][0]*rgb_ptr2[0] + param->matrix[0][1]*rgb_ptr2[1] + param->matrix[0][2]*rgb_ptr2[2];
+ u_tmp += param->matrix[1][0]*rgb_ptr2[0] + param->matrix[1][1]*rgb_ptr2[1] + param->matrix[1][2]*rgb_ptr2[2];
+ v_tmp += param->matrix[2][0]*rgb_ptr2[0] + param->matrix[2][1]*rgb_ptr2[1] + param->matrix[2][2]*rgb_ptr2[2];
+ y_ptr2[0]=clampU8(y_tmp+((param->y_shift)<matrix[0][0]*rgb_ptr2[3] + param->matrix[0][1]*rgb_ptr2[4] + param->matrix[0][2]*rgb_ptr2[5];
+ u_tmp += param->matrix[1][0]*rgb_ptr2[3] + param->matrix[1][1]*rgb_ptr2[4] + param->matrix[1][2]*rgb_ptr2[5];
+ v_tmp += param->matrix[2][0]*rgb_ptr2[3] + param->matrix[2][1]*rgb_ptr2[4] + param->matrix[2][2]*rgb_ptr2[5];
+ y_ptr2[1]=clampU8(y_tmp+((param->y_shift)<*/
+#include "yuv_rgb_common.h"
+
+#include "SDL_stdinc.h"
+
+// yuv to rgb, standard c implementation
+void yuv420_rgb565_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_rgb24_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_rgba_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_bgra_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_argb_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv420_abgr_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_rgb565_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_rgb24_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_rgba_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_bgra_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_argb_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuv422_abgr_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_rgb565_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_rgb24_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_rgba_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_bgra_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_argb_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);
+
+void yuvnv12_abgr_std(
+ uint32_t width, uint32_t height,
+ const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+ uint8_t *rgb, uint32_t rgb_stride,
+ YCbCrType yuv_type);