From fea49a3e2f9240364b973d18dbb9206e6240a67c Mon Sep 17 00:00:00 2001 From: Sylvain Becker Date: Sun, 3 Jan 2021 15:24:47 +0100 Subject: [PATCH] SDL_stretch.c: compilation NEON on Windows 10 --- src/video/SDL_stretch.c | 42 ++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/src/video/SDL_stretch.c b/src/video/SDL_stretch.c index d8bb46e0c..28dd05e7d 100644 --- a/src/video/SDL_stretch.c +++ b/src/video/SDL_stretch.c @@ -610,12 +610,16 @@ scale_mat(const Uint32 *src, int src_w, int src_h, int src_pitch, #if defined(__ARM_NEON) # define HAVE_NEON_INTRINSICS 1 +# define CAST_uint8x8_t (uint8x8_t) +# define CAST_uint32x2_t (uint32x2_t) #endif -/* TODO: this didn't compile on Window10 universal package last time I tried .. */ #if defined(__WINRT__) || defined(_MSC_VER) # if defined(HAVE_NEON_INTRINSICS) -# undef HAVE_NEON_INTRINSICS +# undef CAST_uint8x8_t +# undef CAST_uint32x2_t +# define CAST_uint8x8_t +# define CAST_uint32x2_t # endif #endif @@ -824,8 +828,8 @@ INTERPOL_BILINEAR_NEON(const Uint32 *s0, const Uint32 *s1, int frac_w, uint8x8_t uint16x8_t d0; uint8x8_t e0; - x_00_01 = (uint8x8_t)vld1_u32(s0); /* Load 2 pixels */ - x_10_11 = (uint8x8_t)vld1_u32(s1); + x_00_01 = CAST_uint8x8_t vld1_u32(s0); /* Load 2 pixels */ + x_10_11 = CAST_uint8x8_t vld1_u32(s1); /* Interpolated == x0 + frac * (x1 - x0) == x0 * (1 - frac) + x1 * frac */ k0 = vmull_u8(x_00_01, v_frac_h1); /* k0 := x0 * (1 - frac) */ @@ -846,7 +850,7 @@ INTERPOL_BILINEAR_NEON(const Uint32 *s0, const Uint32 *s1, int frac_w, uint8x8_t e0 = vmovn_u16(d0); /* Store 1 pixel */ - *dst = vget_lane_u32((uint32x2_t)e0, 0); + *dst = vget_lane_u32(CAST_uint32x2_t e0, 0); } static int @@ -911,14 +915,14 @@ scale_mat_NEON(const Uint32 *src, int src_w, int src_h, int src_pitch, Uint32 *d s_16_17 = (const Uint32 *)((const Uint8 *)src_h1 + index_w_3); /* Interpolation vertical */ - x_00_01 = (uint8x8_t)vld1_u32(s_00_01); /* Load 2 pixels */ - x_02_03 = (uint8x8_t)vld1_u32(s_02_03); - x_04_05 = (uint8x8_t)vld1_u32(s_04_05); - x_06_07 = (uint8x8_t)vld1_u32(s_06_07); - x_10_11 = (uint8x8_t)vld1_u32(s_10_11); - x_12_13 = (uint8x8_t)vld1_u32(s_12_13); - x_14_15 = (uint8x8_t)vld1_u32(s_14_15); - x_16_17 = (uint8x8_t)vld1_u32(s_16_17); + x_00_01 = CAST_uint8x8_t vld1_u32(s_00_01); /* Load 2 pixels */ + x_02_03 = CAST_uint8x8_t vld1_u32(s_02_03); + x_04_05 = CAST_uint8x8_t vld1_u32(s_04_05); + x_06_07 = CAST_uint8x8_t vld1_u32(s_06_07); + x_10_11 = CAST_uint8x8_t vld1_u32(s_10_11); + x_12_13 = CAST_uint8x8_t vld1_u32(s_12_13); + x_14_15 = CAST_uint8x8_t vld1_u32(s_14_15); + x_16_17 = CAST_uint8x8_t vld1_u32(s_16_17); /* Interpolated == x0 + frac * (x1 - x0) == x0 * (1 - frac) + x1 * frac */ k0 = vmull_u8(x_00_01, v_frac_h1); /* k0 := x0 * (1 - frac) */ @@ -970,7 +974,7 @@ scale_mat_NEON(const Uint32 *src, int src_w, int src_h, int src_pitch, Uint32 *d /* Narrow again */ e1 = vmovn_u16(d1); - f0 = vcombine_u32((uint32x2_t)e0, (uint32x2_t)e1); + f0 = vcombine_u32(CAST_uint32x2_t e0, CAST_uint32x2_t e1); /* Store 4 pixels */ vst1q_u32(dst, f0); @@ -1009,10 +1013,10 @@ scale_mat_NEON(const Uint32 *src, int src_w, int src_h, int src_pitch, Uint32 *d s_12_13 = (const Uint32 *)((const Uint8 *)src_h1 + index_w_1); /* Interpolation vertical */ - x_00_01 = (uint8x8_t)vld1_u32(s_00_01);/* Load 2 pixels */ - x_02_03 = (uint8x8_t)vld1_u32(s_02_03); - x_10_11 = (uint8x8_t)vld1_u32(s_10_11); - x_12_13 = (uint8x8_t)vld1_u32(s_12_13); + x_00_01 = CAST_uint8x8_t vld1_u32(s_00_01);/* Load 2 pixels */ + x_02_03 = CAST_uint8x8_t vld1_u32(s_02_03); + x_10_11 = CAST_uint8x8_t vld1_u32(s_10_11); + x_12_13 = CAST_uint8x8_t vld1_u32(s_12_13); /* Interpolated == x0 + frac * (x1 - x0) == x0 * (1 - frac) + x1 * frac */ k0 = vmull_u8(x_00_01, v_frac_h1); /* k0 := x0 * (1 - frac) */ @@ -1043,7 +1047,7 @@ scale_mat_NEON(const Uint32 *src, int src_w, int src_h, int src_pitch, Uint32 *d e0 = vmovn_u16(d0); /* Store 2 pixels */ - vst1_u32(dst, (uint32x2_t)e0); + vst1_u32(dst, CAST_uint32x2_t e0); dst += 2; }