[FFmpeg-devel] avfilter: add afftdn filter

Submitted by Paul B Mahol on Sept. 21, 2018, 8:50 p.m.

Details

Message ID 20180921205037.25510-1-onemda@gmail.com
State New
Headers show

Commit Message

Paul B Mahol Sept. 21, 2018, 8:50 p.m.
Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
 configure                |    3 +
 doc/filters.texi         |   44 ++
 libavfilter/Makefile     |    1 +
 libavfilter/af_afftdn.c  | 1050 ++++++++++++++++++++++++++++++++++++++
 libavfilter/allfilters.c |    1 +
 5 files changed, 1099 insertions(+)
 create mode 100644 libavfilter/af_afftdn.c

Patch hide | download patch | download mbox

diff --git a/configure b/configure
index 25120337de..e71f1818d8 100755
--- a/configure
+++ b/configure
@@ -3339,6 +3339,8 @@  libssh_protocol_deps="libssh"
 libtls_conflict="openssl gnutls mbedtls"
 
 # filters
+afftdn_filter_deps="avcodec"
+afftdn_filter_select="fft"
 afftfilt_filter_deps="avcodec"
 afftfilt_filter_select="fft"
 afir_filter_deps="avcodec"
@@ -6862,6 +6864,7 @@  done
 enabled zlib && add_cppflags -DZLIB_CONST
 
 # conditional library dependencies, in any order
+enabled afftdn_filter       && prepend avfilter_deps "avcodec"
 enabled afftfilt_filter     && prepend avfilter_deps "avcodec"
 enabled afir_filter         && prepend avfilter_deps "avcodec"
 enabled amovie_filter       && prepend avfilter_deps "avformat avcodec"
diff --git a/doc/filters.texi b/doc/filters.texi
index 73de0fbea7..bbe900d5e5 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -974,6 +974,50 @@  afade=t=out:st=875:d=25
 @end example
 @end itemize
 
+@section afftdn
+Denoise audio samples with FFT.
+
+A description of the accepted parameters follows.
+
+@table @option
+@item nr
+Set the noise reduction in dB, allowed range is 0.01 to 97.
+Default value is 12 dB.
+
+@item nf
+Set the noise floor in dB, allowed range is 20 to 80.
+Default value is 50 dB.
+
+@item nt
+Set the noise type.
+
+It accepts the following values:
+@table @option
+@item w
+Select white noise.
+
+@item v
+Select vinyl noise.
+
+@item s
+Select shellac noise.
+
+Default value is white noise.
+@end table
+
+@item nb
+Set the noise balance. Default value is 0dB.
+
+@item rf
+Set the residual floor. Default value is 38 dB.
+
+@item tn
+Enable noise tracking. By default is disabled.
+
+@item tr
+Enable residual tracking. By default is disabled.
+@end table
+
 @section afftfilt
 Apply arbitrary expressions to samples in frequency domain.
 
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 67e20cc858..62cc2f561f 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -46,6 +46,7 @@  OBJS-$(CONFIG_AECHO_FILTER)                  += af_aecho.o
 OBJS-$(CONFIG_AEMPHASIS_FILTER)              += af_aemphasis.o
 OBJS-$(CONFIG_AEVAL_FILTER)                  += aeval.o
 OBJS-$(CONFIG_AFADE_FILTER)                  += af_afade.o
+OBJS-$(CONFIG_AFFTDN_FILTER)                 += af_afftdn.o
 OBJS-$(CONFIG_AFFTFILT_FILTER)               += af_afftfilt.o
 OBJS-$(CONFIG_AFIR_FILTER)                   += af_afir.o
 OBJS-$(CONFIG_AFORMAT_FILTER)                += af_aformat.o
diff --git a/libavfilter/af_afftdn.c b/libavfilter/af_afftdn.c
new file mode 100644
index 0000000000..aacf6a5714
--- /dev/null
+++ b/libavfilter/af_afftdn.c
@@ -0,0 +1,1050 @@ 
+/*
+ * Copyright (c) 2018 The FFmpeg Project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <float.h>
+
+#include "libavutil/audio_fifo.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/opt.h"
+#include "libavcodec/avfft.h"
+#include "avfilter.h"
+#include "audio.h"
+#include "formats.h"
+
+#define C (M_LN10 * 0.1)
+
+enum NoiseType {
+    WHITE_NOISE,
+    VINYL_NOISE,
+    SHELLAC_NOISE,
+    NB_NOISE
+};
+
+typedef struct DenoiseChannel {
+    double *amt;
+    double *band_amt;
+    double *band_excit;
+    double *gain;
+    double *prior;
+    double *prior_band_excit;
+    double *clean_data;
+    double *noisy_data;
+    double *out_samples;
+    double *spread_function;
+    FFTComplex *fft_data;
+    FFTContext *fft, *ifft;
+} DenoiseChannel;
+
+typedef struct AudioFFTDenoiseContext {
+    const AVClass *class;
+
+    float noise_reduction;
+    float noise_floor;
+    int noise_type;
+    float noise_balance;
+    float residual_floor;
+    int track_noise;
+    int track_residual;
+
+    float last_residual_floor;
+    float last_noise_floor;
+    float last_noise_reduction;
+    float last_noise_balance;
+    int64_t block_count;
+
+    int64_t pts;
+    int channels;
+    float sample_rate;
+    int buffer_length;
+    int fft_length;
+    int fft_length2;
+    int bin_count;
+    int window_length;
+    int sample_advance;
+    int number_of_bands;
+
+    int band_noise[15];
+    int band_centre[15];
+
+    int *bin2band;
+    double *window;
+    double *band_alpha;
+    double *band_beta;
+    double *abs_var;
+    double *rel_var;
+    double *min_abs_var;
+
+    DenoiseChannel *dnch;
+
+    double max_gain;
+    double window_weight;
+    double floor;
+    double sample_floor;
+    double auto_floor;
+    double sfm_threshold;
+    double sfm_alpha;
+
+    int    noise_band_edge[17];
+    int    noise_band_count;
+    double matrix_a[25];
+    double vector_b[5];
+    double matrix_b[75];
+    double matrix_c[75];
+
+    double noise_band_norm[15];
+    double noise_band_avr[15];
+    double noise_band_avi[15];
+    double noise_band_var[15];
+    double sfm_results[3];
+    int    sfm_fail_flags[512];
+    int    sfm_fail_total;
+    double noise_band_auto_var[15];
+    double noise_band_sample[15];
+
+    AVAudioFifo *fifo;
+} AudioFFTDenoiseContext;
+
+#define OFFSET(x) offsetof(AudioFFTDenoiseContext, x)
+#define A AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption afftdn_options[] = {
+    { "nr", "set the noise reduction", OFFSET(noise_reduction), AV_OPT_TYPE_FLOAT, {.dbl = 12},.01, 97, A },
+    { "nf", "set the noise floor",     OFFSET(noise_floor),     AV_OPT_TYPE_FLOAT, {.dbl = 50}, 20, 80, A },
+    { "nt", "set the noise type",      OFFSET(noise_type),      AV_OPT_TYPE_INT,   {.i64 =  0},  WHITE_NOISE,  NB_NOISE-1, A, "type" },
+    {  "w", "white noise",             0,                       AV_OPT_TYPE_CONST, {.i64 = WHITE_NOISE},   0,  0, A, "type" },
+    {  "v", "vinyl noise",             0,                       AV_OPT_TYPE_CONST, {.i64 = VINYL_NOISE},   0,  0, A, "type" },
+    {  "s", "shellac noise",           0,                       AV_OPT_TYPE_CONST, {.i64 = SHELLAC_NOISE}, 0,  0, A, "type" },
+    { "nb", "set the noise balance",   OFFSET(noise_balance),   AV_OPT_TYPE_FLOAT, {.dbl =  0},-21, 21, A },
+    { "rf", "set the residual floor",  OFFSET(residual_floor),  AV_OPT_TYPE_FLOAT, {.dbl = 38}, 20, 80, A },
+    { "tn", "track noise",             OFFSET(track_noise),     AV_OPT_TYPE_BOOL,  {.i64 =  0},  0,  1, A },
+    { "tr", "track residual",          OFFSET(track_residual),  AV_OPT_TYPE_BOOL,  {.i64 =  0},  0,  1, A },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(afftdn);
+
+static int get_band_noise(AudioFFTDenoiseContext *s,
+                          int band, double a,
+                          double b, double c)
+{
+    double d1, d2, d3;
+
+    d1 = a / s->band_centre[band];
+    d1 = 10.0 * log(1.0 + d1 * d1) / log(10.0);
+    d2 = b / s->band_centre[band];
+    d2 = 10.0 * log(1.0 + d2 * d2) / log(10.0);
+    d3 = s->band_centre[band] / c;
+    d3 = 10.0 * log(1.0 + d3 * d3) / log(10.0);
+
+    return lrintf(-d1 + d2 - d3);
+}
+
+static void factor(double *array, int size)
+{
+    for (int i = 0; i < size - 1; i++) {
+        for (int j = i + 1; j < size; j++) {
+            double d = array[j + i * size] / array[i + i * size];
+
+            array[j + i * size] = d;
+            for (int k = i + 1; k < size; k++) {
+                array[j + k * size] -= d * array[i + k * size];
+            }
+        }
+    }
+}
+
+static void solve(double *matrix, double *vector, int size)
+{
+    double d;
+
+    for (int i = 0; i < size - 1; i++) {
+        for (int j = i + 1; j < size; j++) {
+            d = matrix[j + i * size];
+            vector[j] -= d * vector[i];
+        }
+    }
+
+    vector[(size - 1)] /= matrix[(size * size - 1)];
+
+    for (int i = size - 2; i >= 0; i--) {
+        d = vector[i];
+        for (int j = i + 1; j < size; j++)
+            d -= matrix[i + j * size] * vector[j];
+        vector[i] = d / matrix[(i + i * size)];
+    }
+}
+
+static int process_get_band_noise(AudioFFTDenoiseContext *s, int band)
+{
+    double product, sum, f;
+    int i = 0;
+
+    if (band < 15)
+        return s->band_noise[band];
+
+    for (int j = 0; j < 5; j++) {
+        sum = 0.0;
+        for (int k = 0; k < 15; k++)
+            sum += s->matrix_b[i++] * s->band_noise[k];
+        s->vector_b[j] = sum;
+    }
+    solve(s->matrix_a, s->vector_b, 5);
+    f = (0.5 * s->sample_rate) / s->band_centre[14];
+    f = 15.0 + log(f / 1.5) / log(1.5);
+    sum = 0.0;
+    product = 1.0;
+    for (int j = 0; j < 5; j++) {
+        sum += product * s->vector_b[j];
+        product *= f;
+    }
+    return lrintf(sum);
+}
+
+static void calculate_sfm(AudioFFTDenoiseContext *s, DenoiseChannel *dnch,
+                          int start, int end)
+{
+    double d1 = 0.0, d2 = 1.0;
+    int i = 0, j = 0;
+
+    for (int k = start; k < end; k++) {
+        if (dnch->noisy_data[k] > s->sample_floor) {
+            j++;
+            d1 += dnch->noisy_data[k];
+            d2 *= dnch->noisy_data[k];
+            if (d2 > 1.0E100) {
+                d2 *= 1.0E-100;
+                i++;
+            } else if (d2 < 1.0E-100) {
+                d2 *= 1.0E100;
+                i--;
+            }
+        }
+    }
+    if (j > 1) {
+        d1 /= j;
+        s->sfm_results[0] = d1;
+        d2 = log(d2) + 230.2585 * i;
+        d2 /= j;
+        d1 = log(d1);
+        s->sfm_results[1] = d1;
+        s->sfm_results[2] = (d1 - d2);
+    } else {
+        s->sfm_results[0] = s->auto_floor;
+        s->sfm_results[1] = s->sfm_threshold;
+        s->sfm_results[2] = s->sfm_threshold;
+    }
+}
+
+static double limit_gain(double a, double b)
+{
+    if (a > 1.0)
+        return (b * a - 1.0) / (b + a - 2.0);
+    if (a < 1.0)
+        return (b * a - 2.0 * a + 1.0) / (b - a);
+    return 1.0;
+}
+
+static void process_frame(AudioFFTDenoiseContext *s, DenoiseChannel *dnch,
+                          FFTComplex *fft_data,
+                          double *prior, double *prior_band_excit, int track_noise)
+{
+    double d1, d2, d3, d4;
+    int i, j, n, i1, i2;
+
+    d1 = fft_data[0].re * fft_data[0].re;
+    dnch->noisy_data[0] = d1;
+    d2 = d1 / s->abs_var[0];
+    d3 = 0.98 * prior[0] + 0.02 * fmax(d2 - 1.0, 0.0);
+    d4 = d3 / (1.0 + d3);
+    d4 *= (d4 + M_PI_4 / fmax(d2, 1.0E-6));
+    prior[0] = (d2 * d4);
+    dnch->clean_data[0] = (d1 * d4);
+    d4 = sqrt(d4);
+    dnch->gain[0] = d4;
+    i = 1;
+    j = 1;
+    n = 0;
+    for (i2 = 1; i2 < s->fft_length2; i2++) {
+        d1 = fft_data[i].re * fft_data[i].re + fft_data[i].im * fft_data[i].im;
+        if (d1 > s->sample_floor)
+            n = i2;
+
+        dnch->noisy_data[i2] = d1;
+        d2 = d1 / s->abs_var[i2];
+        d3 = 0.98 * prior[i2] + 0.02 * fmax(d2 - 1.0, 0.0);
+        d4 = d3 / (1.0 + d3);
+        d4 *= (d4 + M_PI_4 / fmax(d2, 1.0E-6));
+        prior[i2] = d2 * d4;
+        dnch->clean_data[i2] = d1 * d4;
+        d4 = sqrt(d4);
+        dnch->gain[i2] = d4;
+        i += 1;
+    }
+    d1 = fft_data[0].im * fft_data[0].im;
+    if (d1 > s->sample_floor)
+        n = s->fft_length2;
+
+    dnch->noisy_data[s->fft_length2] = d1;
+    d2 = d1 / s->abs_var[s->fft_length2];
+    d3 = 0.98 * prior[s->fft_length2] + 0.02 * fmax(d2 - 1.0, 0.0);
+    d4 = d3 / (1.0 + d3);
+    d4 *= d4 + M_PI_4 / fmax(d2, 1.0E-6);
+    prior[s->fft_length2] = d2 * d4;
+    dnch->clean_data[s->fft_length2] = d1 * d4;
+    d4 = sqrt(d4);
+    dnch->gain[s->fft_length2] = d4;
+    if (n > s->fft_length2 - 2) {
+        n = s->bin_count;
+        i1 = s->noise_band_count;
+    } else {
+        i1 = 0;
+        for (i2 = 0; i2 <= s->noise_band_count; i2++) {
+            if (n > 1.1 * s->noise_band_edge[i2]) {
+                i1 = i2;
+            }
+        }
+    }
+
+    if (track_noise && (i1 > s->noise_band_count / 2)) {
+        j = FFMIN(n, s->noise_band_edge[i1]);
+        int m = 3, k;
+        for (k = i1 - 1; k >= 0; k--) {
+            i = s->noise_band_edge[k];
+            calculate_sfm(s, dnch, i, j);
+            s->noise_band_sample[k] = s->sfm_results[0];
+            if (s->sfm_results[2] + 0.013 * m * fmax(0.0, s->sfm_results[1] - 20.53) >= s->sfm_threshold) {
+                break;
+            }
+            j = i;
+            m++;
+        }
+        if (k < i1 - 1) {
+            double d8 = 0.0, d6;
+            for (int i5 = i1 - 1; i5 > k; i5--) {
+                d6 = log(s->noise_band_sample[i5] / s->noise_band_auto_var[i5]);
+                d8 += d6;
+            }
+            i = i1 - k - 1;
+            if (i < 5) {
+                d6 = 3.0E-4 * i * i;
+            } else {
+                d6 = 3.0E-4 * (8 * i - 16);
+            }
+            double d7;
+            if (i < 3) {
+                d7 = 2.0E-4 * i * i;
+            } else {
+                d7 = 2.0E-4 * (4 * i - 4);
+            }
+            if (s->track_residual) {
+                if (s->last_noise_floor > s->last_residual_floor + 9) {
+                    d6 *= 0.5;
+                    d7 *= 0.75;
+                } else if (s->last_noise_floor > s->last_residual_floor + 6) {
+                    d6 *= 0.4;
+                    d7 *= 1.0;
+                } else if (s->last_noise_floor > s->last_residual_floor + 4) {
+                    d6 *= 0.3;
+                    d7 *= 1.3;
+                } else if (s->last_noise_floor > s->last_residual_floor + 2) {
+                    d6 *= 0.2;
+                    d7 *= 1.6;
+                } else if (s->last_noise_floor > s->last_residual_floor) {
+                    d6 *= 0.1;
+                    d7 *= 2.0;
+                } else {
+                    d6 = 0.0;
+                    d7 *= 2.5;
+                }
+            }
+            d8 = fmin(fmax(d8, -d6), d7);
+            d6 = exp(d8);
+            for (int i5 = 0; i5 < 15; i5++) {
+                s->noise_band_auto_var[i5] *= d6;
+            }
+        } else if (s->sfm_results[2] >= s->sfm_threshold) {
+            s->sfm_fail_flags[s->block_count & 0x1FF] = 1;
+            s->sfm_fail_total += 1;
+        }
+    }
+
+    for (int i = 0; i < s->number_of_bands; i++) {
+        dnch->band_excit[i] = 0.0;
+        dnch->band_amt[i] = 0.0;
+    }
+
+    for (int i = 0; i < s->bin_count; i++) {
+        dnch->band_excit[s->bin2band[i]] += dnch->clean_data[i];
+    }
+
+    for (int i = 0; i < s->number_of_bands; i++) {
+        dnch->band_excit[i] = fmax(dnch->band_excit[i],
+                                s->band_alpha[i] * dnch->band_excit[i] +
+                                s->band_beta[i] * prior_band_excit[i]);
+        prior_band_excit[i] = dnch->band_excit[i];
+    }
+
+    i = 0;
+    for (int j = 0; j < s->number_of_bands; j++) {
+        for (int k = 0; k < s->number_of_bands; k++) {
+            dnch->band_amt[j] += dnch->spread_function[i++] * dnch->band_excit[k];
+        }
+    }
+
+    for (int i = 0; i < s->bin_count; i++) {
+        dnch->amt[i] = dnch->band_amt[s->bin2band[i]];
+    }
+
+    if (dnch->amt[0] > s->abs_var[0]) {
+        dnch->gain[0] = 1.0;
+    } else if (dnch->amt[0] > s->min_abs_var[0]) {
+        double limit = sqrt(s->abs_var[0] / dnch->amt[0]);
+        dnch->gain[0] = limit_gain(dnch->gain[0], limit);
+    } else {
+        dnch->gain[0] = limit_gain(dnch->gain[0], s->max_gain);
+    }
+    if (dnch->amt[s->fft_length2] > s->abs_var[s->fft_length2]) {
+        dnch->gain[s->fft_length2] = 1.0;
+    } else if (dnch->amt[s->fft_length2] > s->min_abs_var[s->fft_length2]) {
+        double limit = sqrt(s->abs_var[s->fft_length2] / dnch->amt[s->fft_length2]);
+        dnch->gain[s->fft_length2] = limit_gain(dnch->gain[s->fft_length2], limit);
+    } else {
+        dnch->gain[s->fft_length2] = limit_gain(dnch->gain[s->fft_length2], s->max_gain);
+    }
+    for (int i = 1; i < s->fft_length2; i++) {
+        if (dnch->amt[i] > s->abs_var[i]) {
+            dnch->gain[i] = 1.0;
+        } else if (dnch->amt[i] > s->min_abs_var[i]) {
+            double limit = sqrt(s->abs_var[i] / dnch->amt[i]);
+            dnch->gain[i] = limit_gain(dnch->gain[i], limit);
+        } else {
+            dnch->gain[i] = limit_gain(dnch->gain[i], s->max_gain);
+        }
+    }
+    d4 = dnch->gain[0];
+    dnch->clean_data[0] = (d4 * d4 * dnch->noisy_data[0]);
+    fft_data[0].re *= d4;
+    d4 = dnch->gain[s->fft_length2];
+    dnch->clean_data[s->fft_length2] = (d4 * d4 * dnch->noisy_data[s->fft_length2]);
+    fft_data[0].im *= d4;
+    i = 1;
+    for (int j = 1; j < s->fft_length2; j++) {
+        d4 = dnch->gain[j];
+        dnch->clean_data[j] = (d4 * d4 * dnch->noisy_data[j]);
+        fft_data[i].re *= d4;
+        fft_data[i++].im *= d4;
+    }
+}
+
+static double freq2bark(double x)
+{
+    double d = x / 7500.0;
+    return 13.0 * atan(7.6E-4 * x) + 3.5 * atan(d * d);
+}
+
+static int get_band_centre(AudioFFTDenoiseContext *s, int band)
+{
+    if (band == -1)
+        return lrintf(s->band_centre[0] / 1.5);
+
+    return s->band_centre[band];
+}
+
+static int get_band_edge(AudioFFTDenoiseContext *s, int band)
+{
+    int i;
+
+    if (band == 15) {
+        i = lrintf(s->band_centre[14] * 1.224745);
+    } else {
+        i = lrintf(s->band_centre[band] / 1.224745);
+    }
+    return FFMIN(i, s->sample_rate / 2);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AudioFFTDenoiseContext *s = ctx->priv;
+    double sdiv, d1, d2, d3, d4, d5, d6, d8, d9, sum;
+    double min, max;
+    int i, j, k, m, n;
+
+    s->dnch = av_calloc(inlink->channels, sizeof(*s->dnch));
+    if (!s->dnch)
+        return AVERROR(ENOMEM);
+
+    s->pts = AV_NOPTS_VALUE;
+    s->channels = inlink->channels;
+    s->sample_rate = inlink->sample_rate;
+    s->sample_advance = s->sample_rate / 80;
+    s->window_length = 3 * s->sample_advance;
+    s->fft_length2 = 1 << (32 - ff_clz(s->window_length));
+    s->fft_length = s->fft_length2 * 2;
+    s->buffer_length = s->fft_length * 2;
+    s->bin_count = s->fft_length2 + 1;
+
+    s->band_centre[0] = 80;
+    for (i = 1; i < 15; i++) {
+        s->band_centre[i] = lrintf(1.5 * s->band_centre[(i - 1)] + 5.0);
+        if (s->band_centre[i] < 1000) {
+            s->band_centre[i] = (10 * (s->band_centre[i] / 10));
+        } else if (s->band_centre[i] < 5000) {
+            s->band_centre[i] = (50 * ((s->band_centre[i] + 20) / 50));
+        } else if (s->band_centre[i] < 15000) {
+            s->band_centre[i] = (100 * ((s->band_centre[i] + 45) / 100));
+        } else {
+            s->band_centre[i] = (1000 * ((s->band_centre[i] + 495) / 1000));
+        }
+    }
+
+    switch (s->noise_type) {
+    case WHITE_NOISE:
+        for (i = 0; i < 15; i++)
+            s->band_noise[i] = 0;
+        break;
+    case VINYL_NOISE:
+        for (i = 0; i < 15; i++)
+            s->band_noise[i] = get_band_noise(s, i, 50.0, 500.5, 2125.0) + FFMAX(i - 7, 0);
+        break;
+    case SHELLAC_NOISE:
+        for (i = 0; i < 15; i++)
+            s->band_noise[i] = get_band_noise(s, i, 1.0, 500.0, 1.0E10) + FFMAX(i - 12, -5);
+        break;
+    }
+
+    for (j = 0; j < 5; j++) {
+        for (k = 0; k < 5; k++) {
+            s->matrix_a[j + k * 5] = 0.0;
+            for (m = 0; m < 15; m++)
+                s->matrix_a[(j + k * 5)] += pow(m, j + k);
+        }
+    }
+
+    factor(s->matrix_a, 5);
+    i = 0;
+
+    for (j = 0; j < 5; j++)
+        for (k = 0; k < 15; k++)
+            s->matrix_b[i++] = pow(k, j);
+
+    i = 0;
+    for (j = 0; j < 15; j++)
+        for (k = 0; k < 5; k++)
+            s->matrix_c[i++] = pow(j, k);
+
+    s->window = av_calloc(s->window_length, sizeof(*s->window));
+    s->bin2band = av_calloc(s->bin_count, sizeof(*s->bin2band));
+    s->abs_var = av_calloc(s->bin_count, sizeof(*s->abs_var));
+    s->rel_var = av_calloc(s->bin_count, sizeof(*s->rel_var));
+    s->min_abs_var = av_calloc(s->bin_count, sizeof(*s->min_abs_var));
+    if (!s->window || !s->bin2band || !s->abs_var || !s->rel_var || !s->min_abs_var)
+        return AVERROR(ENOMEM);
+
+    sdiv = s->sample_rate / 17640.0;
+    for (i = 0; i <= s->fft_length2; i++)
+        s->bin2band[i] = lrintf(sdiv * freq2bark((0.5 * i * s->sample_rate) / s->fft_length2));
+
+    s->number_of_bands = s->bin2band[s->fft_length2] + 1;
+
+    s->band_alpha = av_calloc(s->number_of_bands, sizeof(*s->band_alpha));
+    s->band_beta = av_calloc(s->number_of_bands, sizeof(*s->band_beta));
+
+    for (int ch = 0; ch < inlink->channels; ch++) {
+        DenoiseChannel *dnch = &s->dnch[ch];
+
+        dnch->amt = av_calloc(s->bin_count, sizeof(*dnch->amt));
+        dnch->band_amt = av_calloc(s->number_of_bands, sizeof(*dnch->band_amt));
+        dnch->band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->band_excit));
+        dnch->gain = av_calloc(s->bin_count, sizeof(*dnch->gain));
+        dnch->prior = av_calloc(s->bin_count, sizeof(*dnch->prior));
+        dnch->prior_band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->prior_band_excit));
+        dnch->clean_data = av_calloc(s->bin_count, sizeof(*dnch->clean_data));
+        dnch->noisy_data = av_calloc(s->bin_count, sizeof(*dnch->noisy_data));
+        dnch->out_samples = av_calloc(s->buffer_length, sizeof(*dnch->out_samples));
+        dnch->fft_data = av_calloc(s->fft_length2, sizeof(*dnch->fft_data));
+        dnch->fft  = av_fft_init(av_log2(s->fft_length2), 0);
+        dnch->ifft = av_fft_init(av_log2(s->fft_length2), 1);
+        dnch->spread_function = av_calloc(s->number_of_bands * s->number_of_bands,
+                                          sizeof(*dnch->spread_function));
+
+        if (!dnch->amt ||
+            !dnch->band_amt ||
+            !dnch->band_excit ||
+            !dnch->gain ||
+            !dnch->prior ||
+            !dnch->prior_band_excit ||
+            !dnch->clean_data ||
+            !dnch->noisy_data ||
+            !dnch->out_samples ||
+            !dnch->fft_data ||
+            !dnch->spread_function ||
+            !dnch->fft ||
+            !dnch->ifft)
+            return AVERROR(ENOMEM);
+    }
+
+    for (int ch = 0; ch < inlink->channels; ch++) {
+        DenoiseChannel *dnch = &s->dnch[ch];
+        double *prior_band_excit = dnch->prior_band_excit;
+        double *prior = dnch->prior;
+
+        d2 = pow(0.1, 2.5 / sdiv);
+        d3 = pow(0.1, 1.0 / sdiv);
+        j = 0;
+        for (m = 0; m < s->number_of_bands; m++) {
+            for (n = 0; n < s->number_of_bands; n++) {
+                if (n < m) {
+                    dnch->spread_function[j++] = pow(d3, m - n);
+                } else if (n > m) {
+                    dnch->spread_function[j++] = pow(d2, n - m);
+                } else {
+                    dnch->spread_function[j++] = 1.0;
+                }
+            }
+        }
+
+        for (m = 0; m < s->number_of_bands; m++) {
+            dnch->band_excit[m] = 0.0;
+            prior_band_excit[m] = 0.0;
+        }
+
+        for (m = 0; m <= s->fft_length2; m++)
+            dnch->band_excit[s->bin2band[m]] += 1.0;
+
+        j = 0;
+        for (m = 0; m < s->number_of_bands; m++) {
+            for (n = 0; n < s->number_of_bands; n++)
+                prior_band_excit[m] += dnch->spread_function[j++] * dnch->band_excit[n];
+        }
+
+        min = pow(0.1, 2.5);
+        max = pow(0.1, 1.0);
+        for (int i = 0; i < s->number_of_bands; i++) {
+            if (i < (int)(12.0 * sdiv)) {
+                dnch->band_excit[i] = pow(0.1, 1.45 + 0.1 * i / sdiv);
+            } else {
+                dnch->band_excit[i] = pow(0.1, 2.5 - 0.2 * (i / sdiv - 14.0));
+            }
+            dnch->band_excit[i] = av_clipd(dnch->band_excit[i], min, max);
+        }
+
+        for (int i = 0; i <= s->fft_length2; i++)
+            prior[i] = 0.02;
+        for (int i = 0; i < s->buffer_length; i++)
+            dnch->out_samples[i] = 0;
+
+        j = 0;
+        for (int i = 0; i < s->number_of_bands; i++)
+            for (int k = 0; k < s->number_of_bands; k++)
+                dnch->spread_function[j++] *= dnch->band_excit[i] / prior_band_excit[i];
+    }
+
+    j = 0;
+    d8 = s->sample_advance / s->sample_rate;
+    for (int i = 0; i <= s->fft_length2; i++) {
+        if ((i == s->fft_length2) || (s->bin2band[i] > j)) {
+            double d6 = (i - 1) * s->sample_rate / s->fft_length;
+            double d7 = fmin(0.008 + 2.2 / d6, 0.03);
+            s->band_alpha[j] = exp(-d8 / d7);
+            s->band_beta[j] = 1.0 - s->band_alpha[j];
+            j = s->bin2band[i];
+        }
+    }
+
+    d9 = sqrt(16.0 / (9.0 * s->fft_length));
+    sum = 0.0;
+    for (int i = 0; i < s->window_length; i++) {
+        double d10 = sin(i * M_PI / s->window_length);
+        d10 *= d9 * d10;
+        s->window[i] = d10;
+        sum += d10 * d10;
+    }
+
+    s->sfm_threshold = 0.8;
+    s->window_weight = 0.5 * sum;
+    s->floor = (1LL << 48) * exp(-23.025558369790467) * s->window_weight;
+    s->sample_floor = s->floor * exp(4.144600506562284);
+    s->auto_floor = s->floor * exp(6.907667510937141);
+    s->sfm_alpha = 0.05;
+
+    for (i = 0; i < 512; i++)
+        s->sfm_fail_flags[i] = 0;
+
+    s->sfm_fail_total = 0;
+    j = FFMAX((int)(10.0 * (1.3 - s->sfm_threshold)), 1);
+
+    for (i = 0; i < 512; i += j) {
+        s->sfm_fail_flags[i] = 1;
+        s->sfm_fail_total += 1;
+    }
+
+    d2 = 0.0;
+    i = 0;
+    j = 0;
+    k = 0;
+    d5 = 0.0;
+    d6 = process_get_band_noise(s, 0);
+    for (int m = j; m <= s->fft_length2; m++) {
+        if (m == j) {
+            i = j;
+            d5 = d6;
+            if (k == 15) {
+                j = s->bin_count;
+            } else {
+                j = s->fft_length * get_band_centre(s, k) / s->sample_rate;
+            }
+            d2 = j - i;
+            d6 = process_get_band_noise(s, k);
+            k++;
+        }
+        d3 = (j - m) / d2;
+        d4 = (m - i) / d2;
+        s->rel_var[m] = exp((d5 * d3 + d6 * d4) * C);
+    }
+    s->rel_var[s->fft_length2] = exp(d6 * C);
+
+    if (s->last_noise_floor != s->noise_floor) {
+        s->last_noise_floor = s->noise_floor;
+    }
+
+    if (s->track_residual)
+        s->last_noise_floor = fmax(s->last_noise_floor, s->residual_floor);
+
+    d2 = 0.5 * (2 * s->last_noise_floor + s->noise_balance);
+    d1 = s->floor * exp((140.0 - d2) * C);
+
+    for (i = 0; i < 15; i++)
+        s->noise_band_auto_var[i] = d1 * exp((process_get_band_noise(s, i) - 2.0) * C);
+
+    if (s->track_residual) {
+        s->last_residual_floor = s->residual_floor;
+        s->last_noise_reduction = fmax(d2 - s->last_residual_floor, 0);
+        s->max_gain = exp(s->last_noise_reduction * (0.5 * C));
+    } else if (s->noise_reduction != s->last_noise_reduction) {
+        s->last_noise_reduction = s->noise_reduction;
+        s->last_residual_floor = av_clipf(d2 - s->last_noise_reduction, 20, 80);
+
+        s->max_gain = exp(s->last_noise_reduction * (0.5 * C));
+    }
+
+    d3 = 1.0 / (s->max_gain * s->max_gain);
+    for (i = 0; i <= s->fft_length2; i++) {
+        s->abs_var[i] = fmax(d1 * s->rel_var[i], 1.0);
+        s->min_abs_var[i] = d3 * s->abs_var[i];
+    }
+
+    s->noise_band_edge[0] = FFMIN(s->fft_length2, s->fft_length * get_band_edge(s, 0) / s->sample_rate);
+    i = 0;
+    for (int j = 1; j < 16; j++) {
+        s->noise_band_edge[j] = FFMIN(s->fft_length2, s->fft_length * get_band_edge(s, j) / s->sample_rate);
+        if (s->noise_band_edge[j] > (int)(1.1 * s->noise_band_edge[(j - 1)])) {
+            i++;
+        }
+        s->noise_band_edge[16] = i;
+    }
+    s->noise_band_count = s->noise_band_edge[16];
+
+    s->fifo = av_audio_fifo_alloc(inlink->format, inlink->channels, s->fft_length);
+    if (!s->fifo)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static void preprocess(FFTComplex *in, int len)
+{
+    double d1, d2, d3, d4, d5, d6, d7, d8, d9, d10;
+    int n, i, k;
+
+    d5 = 2.0 * M_PI / len;
+    d8 = sin(0.5 * d5);
+    d8 = -2.0 * d8 * d8;
+    d7 = sin(d5);
+    d9 = 1.0 + d8;
+    d6 = d7;
+    n = len / 2;
+
+    for (i = 1; i < len / 4; i++) {
+        k = n - i;
+        d2 = 0.5 * (in[i].re + in[k].re);
+        d1 = 0.5 * (in[i].im - in[k].im);
+        d4 = 0.5 * (in[i].im + in[k].im);
+        d3 = 0.5 * (in[k].re - in[i].re);
+        in[i].re = d2 + d9 * d4 + d6 * d3;
+        in[i].im = d1 + d9 * d3 - d6 * d4;
+        in[k].re = d2 - d9 * d4 - d6 * d3;
+        in[k].im = -d1 + d9 * d3 - d6 * d4;
+        d10 = d9;
+        d9 += d9 * d8 - d6 * d7;
+        d6 += d6 * d8 + d10 * d7;
+    }
+
+    d2 = in[0].re;
+    in[0].re = d2 + in[0].im;
+    in[0].im = d2 - in[0].im;
+}
+
+static void postprocess(FFTComplex *in, int len)
+{
+    double d1, d2, d3, d4, d5, d6, d7, d8, d9, d10;
+    int n, i, k;
+
+    d5 = 2.0 * M_PI / len;
+    d8 = sin(0.5 * d5);
+    d8 = -2.0 * d8 * d8;
+    d7 = sin(d5);
+    d9 = 1.0 + d8;
+    d6 = d7;
+    n = len / 2;
+    for (i = 1; i < len / 4; i++) {
+        k = n - i;
+        d2 = 0.5 * (in[i].re + in[k].re);
+        d1 = 0.5 * (in[i].im - in[k].im);
+        d4 = 0.5 * (in[i].re - in[k].re);
+        d3 = 0.5 * (in[i].im + in[k].im);
+        in[i].re = d2 - d9 * d3 - d6 * d4;
+        in[i].im = d1 + d9 * d4 - d6 * d3;
+        in[k].re = d2 + d9 * d3 + d6 * d4;
+        in[k].im = -d1 + d9 * d4 - d6 * d3;
+        d10 = d9;
+        d9 += d9 * d8 - d6 * d7;
+        d6 += d6 * d8 + d10 * d7;
+    }
+    d2 = in[0].re;
+    in[0].re = 0.5 * (d2 + in[0].im);
+    in[0].im = 0.5 * (d2 - in[0].im);
+}
+
+typedef struct ThreadData {
+    AVFrame *in;
+} ThreadData;
+
+static int filter_channel(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    AudioFFTDenoiseContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame *in = td->in;
+    const int start = (in->channels * jobnr) / nb_jobs;
+    const int end = (in->channels * (jobnr+1)) / nb_jobs;
+
+    for (int ch = start; ch < end; ch++) {
+        DenoiseChannel *dnch = &s->dnch[ch];
+        const float *src = (const float *)in->extended_data[ch];
+        double *dst = dnch->out_samples;
+
+        if (s->track_noise) {
+            int i = s->block_count & 0x1FF;
+
+            if (s->sfm_fail_flags[i])
+                s->sfm_fail_total--;
+            s->sfm_fail_flags[i] = 0;
+            s->sfm_threshold *= 1.0 - s->sfm_alpha;
+            s->sfm_threshold += s->sfm_alpha * (0.5 + 0.0015625 * s->sfm_fail_total);
+        }
+
+        for (int m = 0; m < s->window_length; m++) {
+            dnch->fft_data[m].re = s->window[m] * src[m] * (1LL << 32);
+            dnch->fft_data[m].im = 0;
+        }
+
+        for (int m = s->window_length; m < s->fft_length2; m++) {
+            dnch->fft_data[m].re = 0;
+            dnch->fft_data[m].im = 0;
+        }
+
+        av_fft_permute(dnch->fft, dnch->fft_data);
+        av_fft_calc(dnch->fft, dnch->fft_data);
+
+        preprocess(dnch->fft_data, s->fft_length);
+        process_frame(s, dnch, dnch->fft_data,
+                      dnch->prior,
+                      dnch->prior_band_excit,
+                      s->track_noise);
+        postprocess(dnch->fft_data, s->fft_length);
+
+        av_fft_permute(dnch->ifft, dnch->fft_data);
+        av_fft_calc(dnch->ifft, dnch->fft_data);
+
+        for (int m = 0; m < s->window_length; m++)
+            dst[m] += s->window[m] * dnch->fft_data[m].re / (1LL << 32);
+    }
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AudioFFTDenoiseContext *s = ctx->priv;
+    AVFrame *out = NULL, *in = NULL;
+    ThreadData td;
+    int ch, ret = 0;
+
+    if (s->pts == AV_NOPTS_VALUE)
+        s->pts = frame->pts;
+
+    ret = av_audio_fifo_write(s->fifo, (void **)frame->extended_data, frame->nb_samples);
+    av_frame_free(&frame);
+    if (ret < 0)
+        return ret;
+
+    while (av_audio_fifo_size(s->fifo) >= s->window_length) {
+        if (!in) {
+            in = ff_get_audio_buffer(outlink, s->window_length);
+            if (!in)
+                return AVERROR(ENOMEM);
+        }
+
+        ret = av_audio_fifo_peek(s->fifo, (void **)in->extended_data, s->window_length);
+        if (ret < 0)
+            break;
+
+        s->block_count++;
+        td.in = in;
+        ctx->internal->execute(ctx, filter_channel, &td, NULL,
+                               FFMIN(outlink->channels, ff_filter_get_nb_threads(ctx)));
+
+        out = ff_get_audio_buffer(outlink, s->sample_advance);
+        if (!out) {
+            ret = AVERROR(ENOMEM);
+            break;
+        }
+
+        for (ch = 0; ch < inlink->channels; ch++) {
+            DenoiseChannel *dnch = &s->dnch[ch];
+            double *src = dnch->out_samples;
+            float *dst = (float *)out->extended_data[ch];
+
+            for (int m = 0; m < s->sample_advance; m++)
+                dst[m] = src[m];
+            memmove(src, src + s->sample_advance, (s->window_length - s->sample_advance) * sizeof(*src));
+            memset(src + (s->window_length - s->sample_advance), 0, s->sample_advance * sizeof(*src));
+        }
+
+        av_audio_fifo_drain(s->fifo, s->sample_advance);
+
+        out->pts = s->pts;
+        ret = ff_filter_frame(outlink, out);
+        if (ret < 0)
+            break;
+        s->pts += s->sample_advance;
+    }
+    av_frame_free(&in);
+
+    return ret;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    AudioFFTDenoiseContext *s = ctx->priv;
+
+    av_freep(&s->window);
+    av_freep(&s->bin2band);
+    av_freep(&s->abs_var);
+    av_freep(&s->rel_var);
+    av_freep(&s->min_abs_var);
+    av_freep(&s->band_alpha);
+    av_freep(&s->band_beta);
+
+    if (s->dnch) {
+        for (int ch = 0; ch < s->channels; ch++) {
+            DenoiseChannel *dnch = &s->dnch[ch];
+            av_freep(&dnch->amt);
+            av_freep(&dnch->band_amt);
+            av_freep(&dnch->band_excit);
+            av_freep(&dnch->gain);
+            av_freep(&dnch->prior);
+            av_freep(&dnch->prior_band_excit);
+            av_freep(&dnch->clean_data);
+            av_freep(&dnch->noisy_data);
+            av_freep(&dnch->out_samples);
+            av_freep(&dnch->spread_function);
+            av_freep(&dnch->fft_data);
+            av_fft_end(dnch->fft);
+            dnch->fft = NULL;
+            av_fft_end(dnch->ifft);
+            dnch->ifft = NULL;
+        }
+        av_freep(&s->dnch);
+    }
+
+    av_audio_fifo_free(s->fifo);
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats = NULL;
+    AVFilterChannelLayouts *layouts = NULL;
+    static const enum AVSampleFormat sample_fmts[] = {
+        AV_SAMPLE_FMT_FLTP,
+        AV_SAMPLE_FMT_NONE
+    };
+    int ret;
+
+    formats = ff_make_format_list(sample_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_formats(ctx, formats);
+    if (ret < 0)
+        return ret;
+
+    layouts = ff_all_channel_counts();
+    if (!layouts)
+        return AVERROR(ENOMEM);
+
+    ret = ff_set_common_channel_layouts(ctx, layouts);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    return ff_set_common_samplerates(ctx, formats);
+}
+
+static const AVFilterPad inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .filter_frame = filter_frame,
+        .config_props = config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_AUDIO,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_afftdn = {
+    .name           = "afftdn",
+    .description    = NULL_IF_CONFIG_SMALL("Denoise audio samples using FFT."),
+    .query_formats  = query_formats,
+    .priv_size      = sizeof(AudioFFTDenoiseContext),
+    .priv_class     = &afftdn_class,
+    .uninit         = uninit,
+    .inputs         = inputs,
+    .outputs        = outputs,
+    .flags          = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
+};
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 8b1c0d618c..5e72803b13 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -39,6 +39,7 @@  extern AVFilter ff_af_aecho;
 extern AVFilter ff_af_aemphasis;
 extern AVFilter ff_af_aeval;
 extern AVFilter ff_af_afade;
+extern AVFilter ff_af_afftdn;
 extern AVFilter ff_af_afftfilt;
 extern AVFilter ff_af_afir;
 extern AVFilter ff_af_aformat;