mirror of
https://github.com/Ryujinx/SDL.git
synced 2024-12-23 18:55:32 +00:00
Fixed bug #4534: NEON implementation of Convert51ToStereo (Thanks Ryan!)
This commit is contained in:
parent
5e89b3c89e
commit
072e3fdfc4
|
@ -35,6 +35,10 @@
|
||||||
|
|
||||||
#define DEBUG_AUDIOSTREAM 0
|
#define DEBUG_AUDIOSTREAM 0
|
||||||
|
|
||||||
|
#ifdef __ARM_NEON
|
||||||
|
#define HAVE_NEON_INTRINSICS 1
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef __SSE__
|
#ifdef __SSE__
|
||||||
#define HAVE_SSE_INTRINSICS 1
|
#define HAVE_SSE_INTRINSICS 1
|
||||||
#endif
|
#endif
|
||||||
|
@ -240,6 +244,66 @@ SDL_Convert51ToStereo_SSE(SDL_AudioCVT * cvt, SDL_AudioFormat format)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if HAVE_NEON_INTRINSICS
|
||||||
|
/* Convert from 5.1 to stereo. Average left and right, distribute center, discard LFE. */
|
||||||
|
static void SDLCALL
|
||||||
|
SDL_Convert51ToStereo_NEON(SDL_AudioCVT * cvt, SDL_AudioFormat format)
|
||||||
|
{
|
||||||
|
float *dst = (float *) cvt->buf;
|
||||||
|
const float *src = dst;
|
||||||
|
int i = cvt->len_cvt / (sizeof (float) * 6);
|
||||||
|
const float two_fifths_f = 1.0f / 2.5f;
|
||||||
|
const float32x4_t two_fifths_v = vdupq_n_f32(two_fifths_f);
|
||||||
|
const float32x4_t half = vdupq_n_f32(0.5f);
|
||||||
|
|
||||||
|
LOG_DEBUG_CONVERT("5.1", "stereo (using NEON)");
|
||||||
|
SDL_assert(format == AUDIO_F32SYS);
|
||||||
|
|
||||||
|
/* SDL's 5.1 layout: FL+FR+FC+LFE+BL+BR */
|
||||||
|
|
||||||
|
/* Just use unaligned load/stores, it's the same NEON instructions and
|
||||||
|
hopefully even unaligned NEON is faster than the scalar fallback. */
|
||||||
|
while (i >= 2) {
|
||||||
|
/* Two 5.1 samples (12 floats) fit nicely in three 128bit */
|
||||||
|
/* registers. Using shuffles they can be rearranged so that */
|
||||||
|
/* the conversion math can be vectorized. */
|
||||||
|
const float32x4_t in0 = vld1q_f32(src); /* 0FL 0FR 0FC 0LF */
|
||||||
|
const float32x4_t in1 = vld1q_f32(src + 4); /* 0BL 0BR 1FL 1FR */
|
||||||
|
const float32x4_t in2 = vld1q_f32(src + 8); /* 1FC 1LF 1BL 1BR */
|
||||||
|
|
||||||
|
/* 0FC 0FC 1FC 1FC */
|
||||||
|
const float32x4_t fc_distributed = vmulq_f32(half, vcombine_f32(vdup_lane_f32(vget_high_f32(in0), 0), vdup_lane_f32(vget_low_f32(in2), 0)));
|
||||||
|
|
||||||
|
/* 0FL 0FR 1BL 1BR */
|
||||||
|
const float32x4_t blended = vcombine_f32(vget_low_f32(in0), vget_high_f32(in2));
|
||||||
|
|
||||||
|
/* 0FL 0FR 1BL 1BR */
|
||||||
|
/* + 0BL 0BR 1FL 1FR */
|
||||||
|
/* = 0L 0R 1L 1R */
|
||||||
|
float32x4_t out = vaddq_f32(blended, in1);
|
||||||
|
out = vaddq_f32(out, fc_distributed);
|
||||||
|
out = vmulq_f32(out, two_fifths_v);
|
||||||
|
|
||||||
|
vst1q_f32(dst, out);
|
||||||
|
|
||||||
|
i -= 2; src += 12; dst += 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Finish off any leftovers with scalar operations. */
|
||||||
|
while (i) {
|
||||||
|
const float front_center_distributed = src[2] * 0.5f;
|
||||||
|
dst[0] = (src[0] + front_center_distributed + src[4]) * two_fifths_f; /* left */
|
||||||
|
dst[1] = (src[1] + front_center_distributed + src[5]) * two_fifths_f; /* right */
|
||||||
|
i--; src += 6; dst+=2;
|
||||||
|
}
|
||||||
|
|
||||||
|
cvt->len_cvt /= 3;
|
||||||
|
if (cvt->filters[++cvt->filter_index]) {
|
||||||
|
cvt->filters[cvt->filter_index] (cvt, format);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Convert from 5.1 to stereo. Average left and right, distribute center, discard LFE. */
|
/* Convert from 5.1 to stereo. Average left and right, distribute center, discard LFE. */
|
||||||
static void SDLCALL
|
static void SDLCALL
|
||||||
SDL_Convert51ToStereo(SDL_AudioCVT * cvt, SDL_AudioFormat format)
|
SDL_Convert51ToStereo(SDL_AudioCVT * cvt, SDL_AudioFormat format)
|
||||||
|
@ -1177,6 +1241,12 @@ SDL_BuildAudioCVT(SDL_AudioCVT * cvt,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if HAVE_NEON_INTRINSICS
|
||||||
|
if (!filter && SDL_HasNEON()) {
|
||||||
|
filter = SDL_Convert51ToStereo_NEON;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (!filter) {
|
if (!filter) {
|
||||||
filter = SDL_Convert51ToStereo;
|
filter = SDL_Convert51ToStereo;
|
||||||
}
|
}
|
||||||
|
@ -1231,7 +1301,7 @@ SDL_BuildAudioCVT(SDL_AudioCVT * cvt,
|
||||||
handled by now, but let's be defensive */
|
handled by now, but let's be defensive */
|
||||||
return SDL_SetError("Invalid channel combination");
|
return SDL_SetError("Invalid channel combination");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Do rate conversion, if necessary. Updates (cvt). */
|
/* Do rate conversion, if necessary. Updates (cvt). */
|
||||||
if (SDL_BuildAudioResampleCVT(cvt, dst_channels, src_rate, dst_rate) < 0) {
|
if (SDL_BuildAudioResampleCVT(cvt, dst_channels, src_rate, dst_rate) < 0) {
|
||||||
return -1; /* shouldn't happen, but just in case... */
|
return -1; /* shouldn't happen, but just in case... */
|
||||||
|
@ -1713,7 +1783,7 @@ SDL_AudioStreamPut(SDL_AudioStream *stream, const void *buf, int len)
|
||||||
stream->staging_buffer_filled += len;
|
stream->staging_buffer_filled += len;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Fill the staging buffer, process it, and continue */
|
/* Fill the staging buffer, process it, and continue */
|
||||||
amount = (stream->staging_buffer_size - stream->staging_buffer_filled);
|
amount = (stream->staging_buffer_size - stream->staging_buffer_filled);
|
||||||
SDL_assert(amount > 0);
|
SDL_assert(amount > 0);
|
||||||
|
|
Loading…
Reference in a new issue