diff mbox series

[FFmpeg-devel,WIP,1/1] avcodec/aacenc: improve bit_rate_tolerance=0

Message ID 517a57ef18398a39952fcc1d02e51ef423cf80ae.1726262133.git.pav@iki.fi
State New
Headers show
Series avcodec/aacenc: improve bit_rate_tolerance=0 | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Pauli Virtanen Sept. 13, 2024, 9:15 p.m. UTC
bit_rate_tolerance=0 has a few problems:

- infinite loop if frame_bits doesn't become small enough for any lambda
- bad quality, as it's never increasing lambda above the initial value
- not doing the restoring of coeffs after adjusting lambda

Attempt to address these:

- target bitrate a bit below frame_bits cap with the usual code path
- if frame_bits exceeds cap, find good lambda with a zero finding method

The zero finding usually converges in 1-3 iterations.

Remaining problems:

- instead of the infinite loop, we now silently return the too large
  frame, and let the caller handle it. This is still a bug, but fixing it
  needs something else than playing with lambda.

- it appears the resulting frame_bits depends also on some other state
  than s->lambda. iteration with lambda1, lambda2>lambda1, and then again
  with lambda1 produces different frame_bits on the two lambda1
  iterations. In this case the root finding can fail, as it cannot any
  more return to a previous "good" lambda.

The sound quality from this patch with bit_rate_tolerance=0 is improved,
as it now maintains sufficient bitrate closer to the target.  Encoding
is also faster now that less re-encoding is done:

Before: ffmpeg -i sample.flac -c aac -b:a 200k -bt:a 0 -y before.aac
size=    2776KiB time=00:03:10.98 bitrate= 119.1kbits/s speed=14.4x

After: ffmpeg -i sample.flac -c aac -b:a 200k -bt:a 0 -y after.aac
size=    3897KiB time=00:03:10.98 bitrate= 167.1kbits/s speed=23.6x

Signed-off-by: Pauli Virtanen <pav@iki.fi>
---
 libavcodec/aacenc.c | 153 +++++++++++++++++++++++++++++++++++---------
 libavcodec/aacenc.h |   1 +
 2 files changed, 124 insertions(+), 30 deletions(-)
diff mbox series

Patch

diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c
index 88037c7f87..ffa72be217 100644
--- a/libavcodec/aacenc.c
+++ b/libavcodec/aacenc.c
@@ -826,6 +826,58 @@  static void copy_input_samples(AACEncContext *s, const AVFrame *frame)
     }
 }
 
+/**
+ * Finding zero of function f(x).
+ * The initial zero bracketing assumes f(x) is increasing.
+ */
+typedef struct FindZero {
+    int init;      ///< bitmask of whether x[0] (0x1) and x[1] (0x2) are valid
+    float x[2];    ///< zero of f(x) is in interval (x[0], x[1])
+    float f[2];    ///< interpolation values
+    int i;         ///< which x[i] is latest
+    float b;       ///< bracketing multiplier
+} FindZero;
+
+/** Return next x to evaluate f(x) at to approach the zero. */
+static float find_zero_next(FindZero *r, float x, float f)
+{
+    if (r->init != 0x3) {
+        /* Bracket the zero, assuming x > 0 and f(x) is increasing */
+        r->b = FFMIN(2 + 2 * r->b, 65536.0f);
+        if (f < 0) {
+            r->x[0] = x;
+            r->f[0] = f;
+            r->init |= 0x1;
+            if (r->init != 0x3)
+                return x * r->b;
+        } else {
+            r->x[1] = x;
+            r->f[1] = f;
+            r->init |= 0x2;
+            if (r->init != 0x3)
+                return x / r->b;
+        }
+        r->i = 1;
+    } else {
+        /* Anderson-Bjoerck false position method */
+        if ((f < 0) != (r->f[r->i] < 0)) {
+            r->i = !r->i;
+        } else {
+            float m = 1 - (float)f / r->f[r->i];
+
+            if (m <= 0)
+                m = 0.5f;
+
+            r->f[!r->i] *= m;
+        }
+
+        r->x[r->i] = x;
+        r->f[r->i] = f;
+    }
+
+    return (r->x[0] * r->f[1] - r->x[1] * r->f[0]) / (r->f[1] - r->f[0]);
+}
+
 static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
                             const AVFrame *frame, int *got_packet_ptr)
 {
@@ -839,6 +891,7 @@  static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     int ms_mode = 0, is_mode = 0, tns_mode = 0, pred_mode = 0;
     int chan_el_counter[4];
     FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
+    FindZero find_lambda = { 0 };
 
     /* add current frame to queue */
     if (frame) {
@@ -1100,32 +1153,58 @@  static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
          */
         frame_bits = put_bits_count(&s->pb);
         rate_bits = avctx->bit_rate * 1024 / avctx->sample_rate;
-        rate_bits = FFMIN(rate_bits, 6144 * s->channels - 3);
+        rate_bits = FFMIN(rate_bits, s->max_frame_bits);
         too_many_bits = FFMAX(target_bits, rate_bits);
-        too_many_bits = FFMIN(too_many_bits, 6144 * s->channels - 3);
+        too_many_bits = FFMIN(too_many_bits, s->max_frame_bits);
         too_few_bits = FFMIN(FFMAX(rate_bits - rate_bits/4, target_bits), too_many_bits);
 
-        /* When strict bit-rate control is demanded */
-        if (avctx->bit_rate_tolerance == 0) {
-            if (rate_bits < frame_bits) {
-                float ratio = ((float)rate_bits) / frame_bits;
-                s->lambda *= FFMIN(0.9f, ratio);
-                continue;
-            }
-            /* reset lambda when solution is found */
-            s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
-            break;
-        }
-
         /* When using ABR, be strict (but only for increasing) */
         too_few_bits = too_few_bits - too_few_bits/8;
         too_many_bits = too_many_bits + too_many_bits/2;
 
-        if (   its == 0 /* for steady-state Q-scale tracking */
-            || (its < 5 && (frame_bits < too_few_bits || frame_bits > too_many_bits))
-            || frame_bits >= 6144 * s->channels - 3  )
+        av_log(NULL, AV_LOG_TRACE,
+               "%08d: frame_bits:%d max:%d lambda:%f its:%d%s\n",
+               s->lambda_count, frame_bits, s->max_frame_bits, s->lambda, its,
+               (frame_bits > s->max_frame_bits) ? " BAD" : its ? " RETRY" : "");
+
+        if (frame_bits >= s->max_frame_bits || find_lambda.init) {
+            /* Search for lambda with frame_bits == rate_bits < max_frame_bits */
+            float lambda;
+            int value = frame_bits - rate_bits;
+            int value_max = s->max_frame_bits - rate_bits;
+
+            lambda = find_zero_next(&find_lambda, s->lambda, value);
+            lambda = av_clipf(lambda, FLT_EPSILON, 65536.f);
+
+            /* Close enough? */
+            if (value < value_max && (value > -rate_bits / 20 ||
+                                      value > too_few_bits - rate_bits ||
+                                      fabsf(lambda - s->lambda) < 0.05f * fabsf(lambda)))
+                break;
+
+            if (its > 10 || s->lambda == lambda) {
+                /* Not making enough progress, use whatever we have now. */
+                if (value < value_max)
+                    break;
+
+                if (!(find_lambda.init & 0x1)) {
+                    /* Could't find any lambda that gives a small enough frame.
+                     * Give up, produce the bad frame, and reset lambda for next.
+                     */
+                    s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
+                    break;
+                }
+
+                lambda = find_lambda.x[0] * 0.9f;
+                memset(&find_lambda, 0, sizeof(find_lambda));
+            }
+
+            s->lambda = lambda;
+        } else if (   its == 0 /* for steady-state Q-scale tracking */
+                   || (its < 5 && (frame_bits < too_few_bits || frame_bits > too_many_bits)))
         {
             float ratio = ((float)rate_bits) / frame_bits;
+            float prev_lambda = s->lambda;
 
             if (frame_bits >= too_few_bits && frame_bits <= too_many_bits) {
                 /*
@@ -1142,24 +1221,27 @@  static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
             }
             s->lambda = av_clipf(s->lambda * ratio, FLT_EPSILON, 65536.f);
 
+            /* Give up if we're not making progress. */
+            if (s->lambda == prev_lambda)
+                break;
+
             /* Keep iterating if we must reduce and lambda is in the sky */
-            if (ratio > 0.9f && ratio < 1.1f) {
+            if (ratio > 0.9f && ratio < 1.1f)
                 break;
-            } else {
-                if (is_mode || ms_mode || tns_mode || pred_mode) {
-                    for (i = 0; i < s->chan_map[0]; i++) {
-                        // Must restore coeffs
-                        chans = tag == TYPE_CPE ? 2 : 1;
-                        cpe = &s->cpe[i];
-                        for (ch = 0; ch < chans; ch++)
-                            memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs));
-                    }
-                }
-                its++;
-            }
         } else {
             break;
         }
+
+        if (is_mode || ms_mode || tns_mode || pred_mode) {
+            for (i = 0; i < s->chan_map[0]; i++) {
+                // Must restore coeffs
+                chans = tag == TYPE_CPE ? 2 : 1;
+                cpe = &s->cpe[i];
+                for (ch = 0; ch < chans; ch++)
+                    memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs));
+            }
+        }
+        its++;
     } while (1);
 
     if (s->options.ltp && s->coder->ltp_insert_new_frame)
@@ -1302,6 +1384,17 @@  static av_cold int aac_encode_init(AVCodecContext *avctx)
     avctx->bit_rate = (int64_t)FFMIN(6144 * s->channels / 1024.0 * avctx->sample_rate,
                                      avctx->bit_rate);
 
+    /* Strict bitrate limiting (custom maximum bits per frame).
+     * Reduce target bitrate below the limit to avoid frequent re-encoding.
+     */
+    if (avctx->bit_rate_tolerance == 0) {
+        s->max_frame_bits = FFMAX(744 * s->channels - 3,
+                                  avctx->bit_rate * 1024 / avctx->sample_rate);
+        avctx->bit_rate = (int64_t)avctx->bit_rate * 85 / 100;
+    } else {
+        s->max_frame_bits = 6144 * s->channels - 3;
+    }
+
     /* Profile and option setting */
     avctx->profile = avctx->profile == AV_PROFILE_UNKNOWN ? AV_PROFILE_AAC_LOW :
                      avctx->profile;
diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h
index ae15f91e06..d3342d2f6e 100644
--- a/libavcodec/aacenc.h
+++ b/libavcodec/aacenc.h
@@ -228,6 +228,7 @@  typedef struct AACEncContext {
     int channels;                                ///< channel count
     const uint8_t *reorder_map;                  ///< lavc to aac reorder map
     const uint8_t *chan_map;                     ///< channel configuration map
+    int max_frame_bits;                          ///< maximum bits per frame (0=default)
 
     ChannelElement *cpe;                         ///< channel elements
     FFPsyContext psy;