mirror of
https://github.com/Ryujinx/SDL.git
synced 2024-12-23 09:35:30 +00:00
audio: Resampler optimizations.
- Calculate `j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING` once per loop iteration since we use it multiple times. - Do the left-wing loop in two sections: while `srcframe < 0` and then the remaining calculations when `srcframe >= 0`. This bubbles a conditional out of every iteration of a tight loop, giving us a boost. We could _probably_ do this to the right-wing loop too, but it's less straightforward there. - The real win: Use floats instead of doubles. This almost doubles the speed of the entire function on Intel CPUs, and for embedded things without hardware-level support for doubles, the speedup is enormous. This in theory might reduce audio quality, though, and I had to put a check in place to avoid a division-by-zero that we avoided at higher precision, but this is likely to be worth keeping for at least the Sony PSP and other smaller platforms, if not everyone.
This commit is contained in:
parent
de019568dc
commit
111c3add73
|
@ -725,46 +725,64 @@ SDL_ResampleAudio(const int chans, const int inrate, const int outrate,
|
||||||
const float *inbuf, const int inbuflen,
|
const float *inbuf, const int inbuflen,
|
||||||
float *outbuf, const int outbuflen)
|
float *outbuf, const int outbuflen)
|
||||||
{
|
{
|
||||||
const double finrate = (double) inrate;
|
/* Note that this used to be double, but it looks like we can get by with float in most cases at
|
||||||
const double outtimeincr = 1.0 / ((float) outrate);
|
almost twice the speed on Intel processors, and orders of magnitude more
|
||||||
const double ratio = ((float) outrate) / ((float) inrate);
|
on CPUs that need a software fallback for double calculations. */
|
||||||
|
typedef float ResampleFloatType;
|
||||||
|
|
||||||
|
const ResampleFloatType finrate = (ResampleFloatType) inrate;
|
||||||
|
const ResampleFloatType outtimeincr = ((ResampleFloatType) 1.0f) / ((ResampleFloatType) outrate);
|
||||||
|
const ResampleFloatType ratio = ((float) outrate) / ((float) inrate);
|
||||||
const int paddinglen = ResamplerPadding(inrate, outrate);
|
const int paddinglen = ResamplerPadding(inrate, outrate);
|
||||||
const int framelen = chans * (int)sizeof (float);
|
const int framelen = chans * (int)sizeof (float);
|
||||||
const int inframes = inbuflen / framelen;
|
const int inframes = inbuflen / framelen;
|
||||||
const int wantedoutframes = (int) ((inbuflen / framelen) * ratio); /* outbuflen isn't total to write, it's total available. */
|
const int wantedoutframes = (int) ((inbuflen / framelen) * ratio); /* outbuflen isn't total to write, it's total available. */
|
||||||
const int maxoutframes = outbuflen / framelen;
|
const int maxoutframes = outbuflen / framelen;
|
||||||
const int outframes = SDL_min(wantedoutframes, maxoutframes);
|
const int outframes = SDL_min(wantedoutframes, maxoutframes);
|
||||||
|
ResampleFloatType outtime = 0.0f;
|
||||||
float *dst = outbuf;
|
float *dst = outbuf;
|
||||||
double outtime = 0.0;
|
|
||||||
int i, j, chan;
|
int i, j, chan;
|
||||||
|
|
||||||
for (i = 0; i < outframes; i++) {
|
for (i = 0; i < outframes; i++) {
|
||||||
const int srcindex = (int) (outtime * inrate);
|
const int srcindex = (int) (outtime * inrate);
|
||||||
const double intime = ((double) srcindex) / finrate;
|
const ResampleFloatType intime = ((ResampleFloatType) srcindex) / finrate;
|
||||||
const double innexttime = ((double) (srcindex + 1)) / finrate;
|
const ResampleFloatType innexttime = ((ResampleFloatType) (srcindex + 1)) / finrate;
|
||||||
const double interpolation1 = 1.0 - ((innexttime - outtime) / (innexttime - intime));
|
const ResampleFloatType indeltatime = innexttime - intime;
|
||||||
|
const ResampleFloatType interpolation1 = (indeltatime == 0.0f) ? 1.0f : (1.0f - ((innexttime - outtime) / indeltatime));
|
||||||
const int filterindex1 = (int) (interpolation1 * RESAMPLER_SAMPLES_PER_ZERO_CROSSING);
|
const int filterindex1 = (int) (interpolation1 * RESAMPLER_SAMPLES_PER_ZERO_CROSSING);
|
||||||
const double interpolation2 = 1.0 - interpolation1;
|
const ResampleFloatType interpolation2 = 1.0f - interpolation1;
|
||||||
const int filterindex2 = (int) (interpolation2 * RESAMPLER_SAMPLES_PER_ZERO_CROSSING);
|
const int filterindex2 = (int) (interpolation2 * RESAMPLER_SAMPLES_PER_ZERO_CROSSING);
|
||||||
|
|
||||||
for (chan = 0; chan < chans; chan++) {
|
for (chan = 0; chan < chans; chan++) {
|
||||||
float outsample = 0.0f;
|
float outsample = 0.0f;
|
||||||
|
|
||||||
/* do this twice to calculate the sample, once for the "left wing" and then same for the right. */
|
/* do this twice to calculate the sample, once for the "left wing" and then same for the right. */
|
||||||
/* !!! FIXME: do both wings in one loop */
|
|
||||||
for (j = 0; (filterindex1 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)) < RESAMPLER_FILTER_SIZE; j++) {
|
/* Left wing! split the "srcframe < 0" condition out into a preloop. */
|
||||||
|
for (j = 0; srcindex < j; j++) {
|
||||||
|
const int jsamples = j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING;
|
||||||
const int srcframe = srcindex - j;
|
const int srcframe = srcindex - j;
|
||||||
/* !!! FIXME: we can bubble this conditional out of here by doing a pre loop. */
|
const float insample = lpadding[((paddinglen + srcframe) * chans) + chan];
|
||||||
const float insample = (srcframe < 0) ? lpadding[((paddinglen + srcframe) * chans) + chan] : inbuf[(srcframe * chans) + chan];
|
outsample += (float)(insample * (ResamplerFilter[filterindex1 + jsamples] + (interpolation1 * ResamplerFilterDifference[filterindex1 + jsamples])));
|
||||||
outsample += (float)(insample * (ResamplerFilter[filterindex1 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)] + (interpolation1 * ResamplerFilterDifference[filterindex1 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)])));
|
}
|
||||||
|
|
||||||
|
/* Finish the left wing now that srcframe >= 0 */
|
||||||
|
for (; (filterindex1 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)) < RESAMPLER_FILTER_SIZE; j++) {
|
||||||
|
const int jsamples = j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING;
|
||||||
|
const int srcframe = srcindex - j;
|
||||||
|
const float insample = inbuf[(srcframe * chans) + chan];
|
||||||
|
outsample += (float)(insample * (ResamplerFilter[filterindex1 + jsamples] + (interpolation1 * ResamplerFilterDifference[filterindex1 + jsamples])));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Do the right wing! */
|
||||||
for (j = 0; (filterindex2 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)) < RESAMPLER_FILTER_SIZE; j++) {
|
for (j = 0; (filterindex2 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)) < RESAMPLER_FILTER_SIZE; j++) {
|
||||||
|
const int jsamples = j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING;
|
||||||
const int srcframe = srcindex + 1 + j;
|
const int srcframe = srcindex + 1 + j;
|
||||||
/* !!! FIXME: we can bubble this conditional out of here by doing a post loop. */
|
/* !!! FIXME: we can bubble this conditional out of here by doing a post loop. */
|
||||||
const float insample = (srcframe >= inframes) ? rpadding[((srcframe - inframes) * chans) + chan] : inbuf[(srcframe * chans) + chan];
|
const float insample = (srcframe >= inframes) ? rpadding[((srcframe - inframes) * chans) + chan] : inbuf[(srcframe * chans) + chan];
|
||||||
outsample += (float)(insample * (ResamplerFilter[filterindex2 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)] + (interpolation2 * ResamplerFilterDifference[filterindex2 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)])));
|
outsample += (float)(insample * (ResamplerFilter[filterindex2 + jsamples] + (interpolation2 * ResamplerFilterDifference[filterindex2 + jsamples])));
|
||||||
}
|
}
|
||||||
|
|
||||||
*(dst++) = outsample;
|
*(dst++) = outsample;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue