diff mbox series

[FFmpeg-devel] avfilter/f_ebur128: add all sample rates support

Message ID 20210304150553.8945-1-onemda@gmail.com
State New
Headers show
Series [FFmpeg-devel] avfilter/f_ebur128: add all sample rates support | expand

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate success Make fate finished
andriy/PPC64_make success Make finished
andriy/PPC64_make_fate success Make fate finished

Commit Message

Paul B Mahol March 4, 2021, 3:05 p.m. UTC
Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
 libavfilter/f_ebur128.c | 73 ++++++++++++++++++++++++++---------------
 1 file changed, 46 insertions(+), 27 deletions(-)

Comments

Clément Bœsch March 5, 2021, 1:36 p.m. UTC | #1
On Thu, Mar 04, 2021 at 04:05:53PM +0100, Paul B Mahol wrote:
[...]
> +    double f0 = 1681.974450955533;
> +    double G = 3.999843853973347;
> +    double Q = 0.7071752369554196;
> +
> +    double K = tan(M_PI * f0 / (double)inlink->sample_rate);
> +    double Vh = pow(10.0, G / 20.0);
> +    double Vb = pow(Vh, 0.4996667741545416);

Not a fan of these constants coming out of nowhere. Please add the
following comment in the code: "Unofficial reversed parametrization of pre
and RLB from 48kHz".

And in the commit description, please include the following:

    The magic constants come from the unofficial "ITU-R BS.1770-1 filter
    specifications"¹ by Raiden (libebur128) which relies on "Parameter
    Quantization in Direct-Form Recursive Audio Filters"² by Brian
    Neunaber.

    The constants seem to include a quantization bias, for example:
    - Vb is supposed to be exactly √Vh in a high shelf filter
    - the Pre-filter Gain should likely be 4dB
    - Pre Q and RLB Q are respectively very close to √½ and ½

    Those are not adjusted to prevent the values from drifting away from
    the official specifications.

    An alternative to this approach would be to requantize on the fly as
    proposed by pbelkner³, where the 48kHz code path would use the exact
    specifications constants while derivating constants for other
    frequencies.

    [1]: https://www.scribd.com/document/49991813/ITU-R-BS-1770-1-filters
    [2]: https://www.scribd.com/document/6531763/Direct-Form-Filter-Parameter-Quantization
    [3]: https://hydrogenaud.io/index.php?topic=86116.msg740092#msg740092

Feel free to add more information if you have any. This will likely save a
lot of time for anyone looking into understanding and improving that code
in the future.

> +
> +    double a0 = 1.0 + K / Q + K * K;
> +
> +    ebur128->pre_b[0] = (Vh + Vb * K / Q + K * K) / a0;
> +    ebur128->pre_b[1] = 2.0 * (K * K - Vh) / a0;
> +    ebur128->pre_b[2] = (Vh - Vb * K / Q + K * K) / a0;
> +    ebur128->pre_a[1] = 2.0 * (K * K - 1.0) / a0;
> +    ebur128->pre_a[2] = (1.0 - K / Q + K * K) / a0;
> +
> +    f0 = 38.13547087602444;
> +    Q = 0.5003270373238773;
> +    K = tan(M_PI * f0 / (double)inlink->sample_rate);
> +
> +    ebur128->rlb_b[0] = 1.0;
> +    ebur128->rlb_b[1] = -2.0;
> +    ebur128->rlb_b[2] = 1.0;

> +    ebur128->rlb_a[1] = 2.0 * (K * K - 1.0) / (1.0 + K / Q + K * K);
> +    ebur128->rlb_a[2] = (1.0 - K / Q + K * K) / (1.0 + K / Q + K * K);

You can redefine a0 like with the pre filter and use it in these 2
expressions.

[...]

Regards,
diff mbox series

Patch

diff --git a/libavfilter/f_ebur128.c b/libavfilter/f_ebur128.c
index e81520838d..55913bac1d 100644
--- a/libavfilter/f_ebur128.c
+++ b/libavfilter/f_ebur128.c
@@ -24,7 +24,6 @@ 
  * @see http://tech.ebu.ch/loudness
  * @see https://www.youtube.com/watch?v=iuEtQqC-Sqo "EBU R128 Introduction - Florian Camerer"
  * @todo implement start/stop/reset through filter command injection
- * @todo support other frequencies to avoid resampling
  */
 
 #include <math.h>
@@ -45,20 +44,6 @@ 
 
 #define MAX_CHANNELS 63
 
-/* pre-filter coefficients */
-#define PRE_B0  1.53512485958697
-#define PRE_B1 -2.69169618940638
-#define PRE_B2  1.19839281085285
-#define PRE_A1 -1.69065929318241
-#define PRE_A2  0.73248077421585
-
-/* RLB-filter coefficients */
-#define RLB_B0  1.0
-#define RLB_B1 -2.0
-#define RLB_B2  1.0
-#define RLB_A1 -1.99004745483398
-#define RLB_A2  0.99007225036621
-
 #define ABS_THRES    -70            ///< silence gate: we discard anything below this absolute (LUFS) threshold
 #define ABS_UP_THRES  10            ///< upper loud limit to consider (ABS_THRES being the minimum)
 #define HIST_GRAIN   100            ///< defines histogram precision
@@ -80,6 +65,7 @@  struct hist_entry {
 struct integrator {
     double *cache[MAX_CHANNELS];    ///< window of filtered samples (N ms)
     int cache_pos;                  ///< focus on the last added bin in the cache array
+    int cache_size;
     double sum[MAX_CHANNELS];       ///< sum of the last N ms filtered samples (cache content)
     int filled;                     ///< 1 if the cache is completely filled, 0 otherwise
     double rel_threshold;           ///< relative threshold
@@ -128,9 +114,11 @@  typedef struct EBUR128Context {
     double x[MAX_CHANNELS * 3];     ///< 3 input samples cache for each channel
     double y[MAX_CHANNELS * 3];     ///< 3 pre-filter samples cache for each channel
     double z[MAX_CHANNELS * 3];     ///< 3 RLB-filter samples cache for each channel
+    double pre_b[3];                ///< pre-filter numerator coefficients
+    double pre_a[3];                ///< pre-filter denominator coefficients
+    double rlb_b[3];                ///< rlb-filter numerator coefficients
+    double rlb_a[3];                ///< rlb-filter denominator coefficients
 
-#define I400_BINS  (48000 * 4 / 10)
-#define I3000_BINS (48000 * 3)
     struct integrator i400;         ///< 400ms integrator, used for Momentary loudness  (M), and Integrated loudness (I)
     struct integrator i3000;        ///<    3s integrator, used for Short term loudness (S), and Loudness Range      (LRA)
 
@@ -388,6 +376,32 @@  static int config_audio_input(AVFilterLink *inlink)
     AVFilterContext *ctx = inlink->dst;
     EBUR128Context *ebur128 = ctx->priv;
 
+    double f0 = 1681.974450955533;
+    double G = 3.999843853973347;
+    double Q = 0.7071752369554196;
+
+    double K = tan(M_PI * f0 / (double)inlink->sample_rate);
+    double Vh = pow(10.0, G / 20.0);
+    double Vb = pow(Vh, 0.4996667741545416);
+
+    double a0 = 1.0 + K / Q + K * K;
+
+    ebur128->pre_b[0] = (Vh + Vb * K / Q + K * K) / a0;
+    ebur128->pre_b[1] = 2.0 * (K * K - Vh) / a0;
+    ebur128->pre_b[2] = (Vh - Vb * K / Q + K * K) / a0;
+    ebur128->pre_a[1] = 2.0 * (K * K - 1.0) / a0;
+    ebur128->pre_a[2] = (1.0 - K / Q + K * K) / a0;
+
+    f0 = 38.13547087602444;
+    Q = 0.5003270373238773;
+    K = tan(M_PI * f0 / (double)inlink->sample_rate);
+
+    ebur128->rlb_b[0] = 1.0;
+    ebur128->rlb_b[1] = -2.0;
+    ebur128->rlb_b[2] = 1.0;
+    ebur128->rlb_a[1] = 2.0 * (K * K - 1.0) / (1.0 + K / Q + K * K);
+    ebur128->rlb_a[2] = (1.0 - K / Q + K * K) / (1.0 + K / Q + K * K);
+
     /* Force 100ms framing in case of metadata injection: the frames must have
      * a granularity of the window overlap to be accurately exploited.
      * As for the true peaks mode, it just simplifies the resampling buffer
@@ -418,6 +432,9 @@  static int config_audio_output(AVFilterLink *outlink)
     if (!ebur128->ch_weighting)
         return AVERROR(ENOMEM);
 
+#define I400_BINS  (outlink->sample_rate * 4 / 10)
+#define I3000_BINS (outlink->sample_rate * 3)
+
     for (i = 0; i < nb_channels; i++) {
         /* channel weighting */
         const uint64_t chl = av_channel_layout_extract_channel(outlink->channel_layout, i);
@@ -433,8 +450,10 @@  static int config_audio_output(AVFilterLink *outlink)
             continue;
 
         /* bins buffer for the two integration window (400ms and 3s) */
-        ebur128->i400.cache[i]  = av_calloc(I400_BINS,  sizeof(*ebur128->i400.cache[0]));
-        ebur128->i3000.cache[i] = av_calloc(I3000_BINS, sizeof(*ebur128->i3000.cache[0]));
+        ebur128->i400.cache_size = I400_BINS;
+        ebur128->i3000.cache_size = I3000_BINS;
+        ebur128->i400.cache[i]  = av_calloc(ebur128->i400.cache_size,  sizeof(*ebur128->i400.cache[0]));
+        ebur128->i3000.cache[i] = av_calloc(ebur128->i3000.cache_size, sizeof(*ebur128->i3000.cache[0]));
         if (!ebur128->i400.cache[i] || !ebur128->i3000.cache[i])
             return AVERROR(ENOMEM);
     }
@@ -613,7 +632,8 @@  static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
 
 #define MOVE_TO_NEXT_CACHED_ENTRY(time) do {                \
     ebur128->i##time.cache_pos++;                           \
-    if (ebur128->i##time.cache_pos == I##time##_BINS) {     \
+    if (ebur128->i##time.cache_pos ==                       \
+        ebur128->i##time.cache_size) {                      \
         ebur128->i##time.filled    = 1;                     \
         ebur128->i##time.cache_pos = 0;                     \
     }                                                       \
@@ -634,20 +654,20 @@  static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
                 continue;
 
             /* Y[i] = X[i]*b0 + X[i-1]*b1 + X[i-2]*b2 - Y[i-1]*a1 - Y[i-2]*a2 */
-#define FILTER(Y, X, name) do {                                                 \
+#define FILTER(Y, X, NUM, DEN) do {                                             \
             double *dst = ebur128->Y + ch*3;                                    \
             double *src = ebur128->X + ch*3;                                    \
             dst[2] = dst[1];                                                    \
             dst[1] = dst[0];                                                    \
-            dst[0] = src[0]*name##_B0 + src[1]*name##_B1 + src[2]*name##_B2     \
-                                      - dst[1]*name##_A1 - dst[2]*name##_A2;    \
+            dst[0] = src[0]*NUM[0] + src[1]*NUM[1] + src[2]*NUM[2]              \
+                                   - dst[1]*DEN[1] - dst[2]*DEN[2];             \
 } while (0)
 
             // TODO: merge both filters in one?
-            FILTER(y, x, PRE);  // apply pre-filter
+            FILTER(y, x, ebur128->pre_b, ebur128->pre_a);  // apply pre-filter
             ebur128->x[ch * 3 + 2] = ebur128->x[ch * 3 + 1];
             ebur128->x[ch * 3 + 1] = ebur128->x[ch * 3    ];
-            FILTER(z, y, RLB);  // apply RLB-filter
+            FILTER(z, y, ebur128->rlb_b, ebur128->rlb_a);  // apply RLB-filter
 
             bin = ebur128->z[ch * 3] * ebur128->z[ch * 3];
 
@@ -896,7 +916,6 @@  static int query_formats(AVFilterContext *ctx)
     int ret;
 
     static const enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_NONE };
-    static const int input_srate[] = {48000, -1}; // ITU-R BS.1770 provides coeff only for 48kHz
     static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_RGB24, AV_PIX_FMT_NONE };
 
     /* set optional output video format */
@@ -920,7 +939,7 @@  static int query_formats(AVFilterContext *ctx)
         (ret = ff_channel_layouts_ref(layouts, &outlink->incfg.channel_layouts)) < 0)
         return ret;
 
-    formats = ff_make_format_list(input_srate);
+    formats = ff_all_samplerates();
     if ((ret = ff_formats_ref(formats, &inlink->outcfg.samplerates)) < 0 ||
         (ret = ff_formats_ref(formats, &outlink->incfg.samplerates)) < 0)
         return ret;