audio: Resampler optimizations.

- Calculate `j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING` once per loop iteration since we use it multiple times. - Do the left-wing loop in two sections: while `srcframe < 0` and then the remaining calculations when `srcframe >= 0`. This bubbles a conditional out of every iteration of a tight loop, giving us a boost. We could _probably_ do this to the right-wing loop too, but it's less straightforward there. - The real win: Use floats instead of doubles. This almost doubles the speed of the entire function on Intel CPUs, and for embedded things without hardware-level support for doubles, the speedup is enormous. This in theory might reduce audio quality, though, and I had to put a check in place to avoid a division-by-zero that we avoided at higher precision, but this is likely to be worth keeping for at least the Sony PSP and other smaller platforms, if not everyone.
2025-11-21 12:54:53 +00:00 · 2022-04-10 13:23:51 -04:00 · 2022-04-10 13:23:51 -04:00 · 111c3add73
parent de019568dc
commit 111c3add73
1 changed files with 32 additions and 14 deletions
--- a/src/audio/SDL_audiocvt.c
+++ b/src/audio/SDL_audiocvt.c
@ -725,46 +725,64 @@ SDL_ResampleAudio(const int chans, const int inrate, const int outrate,
                        const float *inbuf, const int inbuflen,
                        float *outbuf, const int outbuflen)
 {
-    const double finrate = (double) inrate;
+    /* Note that this used to be double, but it looks like we can get by with float in most cases at
-    const double outtimeincr = 1.0 / ((float) outrate);
+       almost twice the speed on Intel processors, and orders of magnitude more
-    const double  ratio = ((float) outrate) / ((float) inrate);
+       on CPUs that need a software fallback for double calculations. */
    typedef float ResampleFloatType;
    const ResampleFloatType finrate = (ResampleFloatType) inrate;
    const ResampleFloatType outtimeincr = ((ResampleFloatType) 1.0f) / ((ResampleFloatType) outrate);
    const ResampleFloatType ratio = ((float) outrate) / ((float) inrate);
    const int paddinglen = ResamplerPadding(inrate, outrate);
    const int framelen = chans * (int)sizeof (float);
    const int inframes = inbuflen / framelen;
    const int wantedoutframes = (int) ((inbuflen / framelen) * ratio);  /* outbuflen isn't total to write, it's total available. */
    const int maxoutframes = outbuflen / framelen;
    const int outframes = SDL_min(wantedoutframes, maxoutframes);
    ResampleFloatType outtime = 0.0f;
    float *dst = outbuf;
    double outtime = 0.0;
    int i, j, chan;
    for (i = 0; i < outframes; i++) {
        const int srcindex = (int) (outtime * inrate);
-        const double intime = ((double) srcindex) / finrate;
+        const ResampleFloatType intime = ((ResampleFloatType) srcindex) / finrate;
-        const double innexttime = ((double) (srcindex + 1)) / finrate;
+        const ResampleFloatType innexttime = ((ResampleFloatType) (srcindex + 1)) / finrate;
-        const double interpolation1 = 1.0 - ((innexttime - outtime) / (innexttime - intime));
+        const ResampleFloatType indeltatime = innexttime - intime;
        const ResampleFloatType interpolation1 = (indeltatime == 0.0f) ? 1.0f : (1.0f - ((innexttime - outtime) / indeltatime));
        const int filterindex1 = (int) (interpolation1 * RESAMPLER_SAMPLES_PER_ZERO_CROSSING);
-        const double interpolation2 = 1.0 - interpolation1;
+        const ResampleFloatType interpolation2 = 1.0f - interpolation1;
        const int filterindex2 = (int) (interpolation2 * RESAMPLER_SAMPLES_PER_ZERO_CROSSING);
        for (chan = 0; chan < chans; chan++) {
            float outsample = 0.0f;
            /* do this twice to calculate the sample, once for the "left wing" and then same for the right. */
-            /* !!! FIXME: do both wings in one loop */
+
-            for (j = 0; (filterindex1 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)) < RESAMPLER_FILTER_SIZE; j++) {
+            /* Left wing! split the "srcframe < 0" condition out into a preloop. */
            for (j = 0; srcindex < j; j++) {
                const int jsamples = j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING;
                const int srcframe = srcindex - j;
-                /* !!! FIXME: we can bubble this conditional out of here by doing a pre loop. */
+                const float insample = lpadding[((paddinglen + srcframe) * chans) + chan];
-                const float insample = (srcframe < 0) ? lpadding[((paddinglen + srcframe) * chans) + chan] : inbuf[(srcframe * chans) + chan];
+                outsample += (float)(insample * (ResamplerFilter[filterindex1 + jsamples] + (interpolation1 * ResamplerFilterDifference[filterindex1 + jsamples])));
-                outsample += (float)(insample * (ResamplerFilter[filterindex1 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)] + (interpolation1 * ResamplerFilterDifference[filterindex1 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)])));
+            }
            /* Finish the left wing now that srcframe >= 0 */
            for (; (filterindex1 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)) < RESAMPLER_FILTER_SIZE; j++) {
                const int jsamples = j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING;
                const int srcframe = srcindex - j;
                const float insample = inbuf[(srcframe * chans) + chan];
                outsample += (float)(insample * (ResamplerFilter[filterindex1 + jsamples] + (interpolation1 * ResamplerFilterDifference[filterindex1 + jsamples])));
            }
            /* Do the right wing! */
            for (j = 0; (filterindex2 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)) < RESAMPLER_FILTER_SIZE; j++) {
                const int jsamples = j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING;
                const int srcframe = srcindex + 1 + j;
                /* !!! FIXME: we can bubble this conditional out of here by doing a post loop. */
                const float insample = (srcframe >= inframes) ? rpadding[((srcframe - inframes) * chans) + chan] : inbuf[(srcframe * chans) + chan];
-                outsample += (float)(insample * (ResamplerFilter[filterindex2 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)] + (interpolation2 * ResamplerFilterDifference[filterindex2 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)])));
+                outsample += (float)(insample * (ResamplerFilter[filterindex2 + jsamples] + (interpolation2 * ResamplerFilterDifference[filterindex2 + jsamples])));
            }
            *(dst++) = outsample;
        }