[FFmpeg-devel] Adding libspeexdsp's AEC to FFmpeg.

Submitted by Arseniy Skvortsov on Sept. 4, 2018, 7:35 a.m.

Details

Message ID op.zos3d1t56r7ij4@ptits-lt
State New
Headers show

Commit Message

Arseniy Skvortsov Sept. 4, 2018, 7:35 a.m.
Arseniy Skvortsov <ettavolt@gmail.com> писал(а) в своём письме Tue, 04 Sep  
2018 10:09:22 +0300:

> I'm trying to add AEC from libspeexdsp to libavfilter.


Sorry, here are the same files with proper mimetypes.
LD_LIBRARY_PATH=./ ./ffmpeg -loglevel 60 -f alsa -ac 1 -i 'front:CARD=C615,DEV=0' -filter_complex '[0:0] [d] libspeex_aec, asplit [acpl] [pl]; aevalsrc=s=48000:d=2134:c=mono:exprs=0, aformat=sample_fmts=s16p [fd]; [fd] [acpl] concat=v=0:a=1 [d];[pl] aformat=channel_layouts=mono [plr]; anullsrc=mono:48000 [i2];[i2] [plr] amerge=2 [c2]' -map '[c2]' -f alsa 'front:CARD=PCH,DEV=0' >log.txt 2>&1
ffmpeg version N-91595-g390d7ea2a3 Copyright (c) 2000-2018 the FFmpeg developers
  built with gcc 8.2.0 (GCC)
  configuration: --prefix=/usr --disable-debug --disable-static --disable-stripping --disable-everything --enable-gpl --enable-encoder=opus --enable-encoder=png --enable-encoder=pcm_s16le --enable-encoder=libx264 --enable-decoder=opus --enable-decoder=png --enable-decoder=h264 --enable-decoder=mjpeg --enable-decoder=pcm_s16le --enable-decoder=aac --enable-muxer=matroska --enable-muxer=mov --enable-muxer=rtp --enable-muxer=rtsp --enable-demuxer=matroska --enable-demuxer=sdp --enable-demuxer=rtp --enable-demuxer=rtsp --enable-parser=opus --enable-parser=h264 --enable-parser=png --enable-protocol=udp --enable-protocol=tcp --enable-protocol=rtp --enable-protocol=file --enable-protocol=http --enable-indev=alsa --enable-indev=v4l2 --enable-outdev=alsa --enable-outdev=fbdev --enable-outdev=sdl2 --enable-filter=scale --enable-filter=adelay --enable-filter=afifo --enable-filter=aresample --enable-filter=asplit --enable-filter=resample --enable-filter=anullsrc --enable-filter=aevalsrc --enable-filter=amix --enable-filter=amerge --enable-filter=channelsplit --enable-filter=volume --enable-filter=volumedetect --enable-filter=crop --enable-filter=eq --enable-filter=format --enable-filter=lenscorrection --enable-filter=libspeex_aec --enable-filter=vignette --enable-filter=perspective --enable-filter=concat --enable-filter=split --enable-libdrm --enable-libopus --enable-libpulse --enable-libspeexdsp --enable-libv4l2 --enable-libx264 --enable-libxml2 --enable-shared --enable-version3 --disable-ffprobe --disable-doc --enable-debug=2 --disable-optimizations
  libavutil      56. 19.100 / 56. 19.100
  libavcodec     58. 22.101 / 58. 22.101
  libavformat    58. 17.101 / 58. 17.101
  libavdevice    58.  4.101 / 58.  4.101
  libavfilter     7. 26.100 /  7. 26.100
  libswscale      5.  2.100 /  5.  2.100
  libswresample   3.  2.100 /  3.  2.100
  libpostproc    55.  2.100 / 55.  2.100
Splitting the commandline.
Reading option '-loglevel' ... matched as option 'loglevel' (set logging level) with argument '60'.
Reading option '-f' ... matched as option 'f' (force format) with argument 'alsa'.
Reading option '-ac' ... matched as option 'ac' (set number of audio channels) with argument '1'.
Reading option '-i' ... matched as input url with argument 'front:CARD=C615,DEV=0'.
Reading option '-filter_complex' ... matched as option 'filter_complex' (create a complex filtergraph) with argument '[0:0] [d] libspeex_aec, asplit [acpl] [pl]; aevalsrc=s=48000:d=2134:c=mono:exprs=0, aformat=sample_fmts=s16p [fd]; [fd] [acpl] concat=v=0:a=1 [d];[pl] aformat=channel_layouts=mono [plr]; anullsrc=mono:48000 [i2];[i2] [plr] amerge=2 [c2]'.
Reading option '-map' ... matched as option 'map' (set input stream mapping) with argument '[c2]'.
Reading option '-f' ... matched as option 'f' (force format) with argument 'alsa'.
Reading option 'front:CARD=PCH,DEV=0' ... matched as output url.
Finished splitting the commandline.
Parsing a group of options: global .
Applying option loglevel (set logging level) with argument 60.
Applying option filter_complex (create a complex filtergraph) with argument [0:0] [d] libspeex_aec, asplit [acpl] [pl]; aevalsrc=s=48000:d=2134:c=mono:exprs=0, aformat=sample_fmts=s16p [fd]; [fd] [acpl] concat=v=0:a=1 [d];[pl] aformat=channel_layouts=mono [plr]; anullsrc=mono:48000 [i2];[i2] [plr] amerge=2 [c2].
Successfully parsed a group of options.
Parsing a group of options: input url front:CARD=C615,DEV=0.
Applying option f (force format) with argument alsa.
Applying option ac (set number of audio channels) with argument 1.
Successfully parsed a group of options.
Opening an input file: front:CARD=C615,DEV=0.
[alsa @ 0x56554d3328c0] All info found
[alsa @ 0x56554d3328c0] stream 0: start_time: 1536038679.812 duration: -9223372036854.775
[alsa @ 0x56554d3328c0] format: start_time: 1536038679.812 duration: -9223372036854.775 bitrate=768 kb/s
Guessed Channel Layout for Input Stream #0.0 : mono
Input #0, alsa, from 'front:CARD=C615,DEV=0':
  Duration: N/A, start: 1536038679.812124, bitrate: 768 kb/s
    Stream #0:0, 1, 1/1000000: Audio: pcm_s16le, 48000 Hz, mono, s16, 768 kb/s
Successfully opened the file.
[Parsed_aevalsrc_2 @ 0x56554d383c40] Setting 's' to value '48000'
[Parsed_aevalsrc_2 @ 0x56554d383c40] Setting 'd' to value '2134'
[Parsed_aevalsrc_2 @ 0x56554d383c40] Setting 'c' to value 'mono'
[Parsed_aevalsrc_2 @ 0x56554d383c40] Setting 'exprs' to value '0'
[Parsed_aformat_3 @ 0x56554d384ac0] Setting 'sample_fmts' to value 's16p'
[Parsed_concat_4 @ 0x56554d3859c0] Setting 'v' to value '0'
[Parsed_concat_4 @ 0x56554d3859c0] Setting 'a' to value '1'
[Parsed_aformat_5 @ 0x56554d386300] Setting 'channel_layouts' to value 'mono'
[Parsed_anullsrc_6 @ 0x56554d387f40] Setting 'channel_layout' to value 'mono'
[Parsed_anullsrc_6 @ 0x56554d387f40] Setting 'sample_rate' to value '48000'
[Parsed_amerge_7 @ 0x56554d388880] Setting 'inputs' to value '2'
Parsing a group of options: output url front:CARD=PCH,DEV=0.
Applying option map (set input stream mapping) with argument [c2].
Applying option f (force format) with argument alsa.
Successfully parsed a group of options.
Opening an output file: front:CARD=PCH,DEV=0.
Successfully opened the file.
Stream mapping:
  Stream #0:0 (pcm_s16le) -> libspeex_aec:record
  amerge -> Stream #0:0 (pcm_s16le)
Press [q] to stop, [?] for help
cur_dts is invalid (this is harmless if it occurs once at the start per stream)
detected 4 logical cores
[Parsed_aevalsrc_2 @ 0x56554d38c800] Setting 's' to value '48000'
[Parsed_aevalsrc_2 @ 0x56554d38c800] Setting 'd' to value '2134'
[Parsed_aevalsrc_2 @ 0x56554d38c800] Setting 'c' to value 'mono'
[Parsed_aevalsrc_2 @ 0x56554d38c800] Setting 'exprs' to value '0'
[Parsed_aformat_3 @ 0x56554d38d680] Setting 'sample_fmts' to value 's16p'
[Parsed_concat_4 @ 0x56554d386080] Setting 'v' to value '0'
[Parsed_concat_4 @ 0x56554d386080] Setting 'a' to value '1'
[Parsed_aformat_5 @ 0x56554d38b940] Setting 'channel_layouts' to value 'mono'
[Parsed_anullsrc_6 @ 0x56554d389900] Setting 'channel_layout' to value 'mono'
[Parsed_anullsrc_6 @ 0x56554d389900] Setting 'sample_rate' to value '48000'
[Parsed_amerge_7 @ 0x56554d38c400] Setting 'inputs' to value '2'
[graph_0_in_0_0 @ 0x56554d38ecc0] Setting 'time_base' to value '1/48000'
[graph_0_in_0_0 @ 0x56554d38ecc0] Setting 'sample_rate' to value '48000'
[graph_0_in_0_0 @ 0x56554d38ecc0] Setting 'sample_fmt' to value 's16'
[graph_0_in_0_0 @ 0x56554d38ecc0] Setting 'channel_layout' to value '0x4'
[graph_0_in_0_0 @ 0x56554d38ecc0] tb:1/48000 samplefmt:s16 samplerate:48000 chlayout:0x4
[format_out_0_0 @ 0x56554d38d580] Setting 'sample_fmts' to value 's16'
[Parsed_libspeex_aec_0 @ 0x56554d38b480] auto-inserting filter 'auto_fifo_0' between the filter 'graph_0_in_0_0' and the filter 'Parsed_libspeex_aec_0'
[Parsed_libspeex_aec_0 @ 0x56554d38b480] auto-inserting filter 'auto_fifo_1' between the filter 'Parsed_concat_4' and the filter 'Parsed_libspeex_aec_0'
[Parsed_amerge_7 @ 0x56554d38c400] Input channel layouts overlap: output layout will be determined by the number of distinct input channels
[Parsed_aformat_3 @ 0x56554d38d680] auto-inserting filter 'auto_resampler_0' between the filter 'Parsed_aevalsrc_2' and the filter 'Parsed_aformat_3'
[Parsed_amerge_7 @ 0x56554d38c400] auto-inserting filter 'auto_resampler_1' between the filter 'Parsed_aformat_5' and the filter 'Parsed_amerge_7'
[auto_fifo_0 @ 0x56554d384ec0] auto-inserting filter 'auto_resampler_2' between the filter 'graph_0_in_0_0' and the filter 'auto_fifo_0'
[AVFilterGraph @ 0x56554d38a600] query_formats: 13 queried, 28 merged, 11 already done, 0 delayed
[Parsed_anullsrc_6 @ 0x56554d389900] sample_rate:48000 channel_layout:'mono' nb_samples:1024
[auto_resampler_2 @ 0x56554d392cc0] [SWR @ 0x56554d3bf600] Using s16p internally between filters
[auto_resampler_2 @ 0x56554d392cc0] ch:1 chl:mono fmt:s16 r:48000Hz -> ch:1 chl:mono fmt:s16p r:48000Hz
[Parsed_aevalsrc_2 @ 0x56554d38c800] sample_rate:48000 chlayout:mono duration:2134000000
[auto_resampler_0 @ 0x56554d38d380] [SWR @ 0x56554d394e80] Using dblp internally between filters
[auto_resampler_0 @ 0x56554d38d380] ch:1 chl:mono fmt:dblp r:48000Hz -> ch:1 chl:mono fmt:s16p r:48000Hz
[Parsed_asplit_1 @ 0x56554d38bb80] circular filter chain detected
[Parsed_libspeex_aec_0 @ 0x56554d38b480] mics:1 speakers:1
[auto_resampler_1 @ 0x56554d3aa1c0] [SWR @ 0x56554d3aa640] Using s16p internally between filters
[auto_resampler_1 @ 0x56554d3aa1c0] ch:1 chl:mono fmt:s16p r:48000Hz -> ch:1 chl:mono fmt:s16 r:48000Hz
[Parsed_amerge_7 @ 0x56554d38c400] in0:mono + in1:mono -> out:stereo
Output #0, alsa, to 'front:CARD=PCH,DEV=0':
  Metadata:
    encoder         : Lavf58.17.101
    Stream #0:0, 0, 1/48000: Audio: pcm_s16le, 48000 Hz, stereo, s16, 1536 kb/s
    Metadata:
      encoder         : Lavc58.22.101 pcm_s16le
cur_dts is invalid (this is harmless if it occurs once at the start per stream)
    Last message repeated 12 times
[alsa @ 0x56554d388bc0] ALSA buffer xrun.
    Last message repeated 21 times
size=N/A time=00:00:00.49 bitrate=N/A speed=0.972x    
[alsa @ 0x56554d388bc0] ALSA buffer xrun.
    Last message repeated 22 times
size=N/A time=00:00:00.98 bitrate=N/A speed=0.976x    
[alsa @ 0x56554d388bc0] ALSA buffer xrun.
    Last message repeated 17 times
size=N/A time=00:00:01.36 bitrate=N/A speed=0.939x    
video:0kB audio:256kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: unknown
Input file #0 (front:CARD=C615,DEV=0):
  Input stream #0:0 (audio): 604 packets read (132576 bytes); 604 frames decoded (66288 samples); 
  Total: 604 packets (132576 bytes) demuxed
Output file #0 (front:CARD=PCH,DEV=0):
  Output stream #0:0 (audio): 64 frames encoded (65536 samples); 64 packets muxed (262144 bytes); 
  Total: 64 packets (262144 bytes) muxed
604 frames successfully decoded, 0 decoding errors
Exiting normally, received signal 2.

Patch hide | download patch | download mbox

From 88fd203d3b842f7db2cda34b72452a1ced711ed3 Mon Sep 17 00:00:00 2001
From: Ptits de Barbe <ettavolt@rambler.ru>
Date: Sun, 26 Aug 2018 11:45:28 +0300
Subject: [PATCH] AEC filter from Speex.

---
 configure                     |   4 +
 libavfilter/Makefile          |   1 +
 libavfilter/af_libspeex_aec.c | 293 ++++++++++++++++++++++++++++++++++
 libavfilter/allfilters.c      |   1 +
 4 files changed, 299 insertions(+)
 create mode 100644 libavfilter/af_libspeex_aec.c

diff --git a/configure b/configure
index e718c1531c..45c8cba724 100755
--- a/configure
+++ b/configure
@@ -259,6 +259,7 @@  External library support:
   --enable-libsnappy       enable Snappy compression, needed for hap encoding [no]
   --enable-libsoxr         enable Include libsoxr resampling [no]
   --enable-libspeex        enable Speex de/encoding via libspeex [no]
+  --enable-libspeexdsp     enable Speex AEC [no]
   --enable-libsrt          enable Haivision SRT protocol via libsrt [no]
   --enable-libssh          enable SFTP protocol via libssh [no]
   --enable-libtensorflow   enable TensorFlow as a DNN module backend
@@ -1717,6 +1718,7 @@  EXTERNAL_LIBRARY_LIST="
     libsnappy
     libsoxr
     libspeex
+    libspeexdsp
     libsrt
     libssh
     libtensorflow
@@ -3086,6 +3088,7 @@  libopus_encoder_select="audio_frame_queue"
 librsvg_decoder_deps="librsvg"
 libshine_encoder_deps="libshine"
 libshine_encoder_select="audio_frame_queue"
+libspeex_aec_filter_deps="libspeexdsp"
 libspeex_decoder_deps="libspeex"
 libspeex_encoder_deps="libspeex"
 libspeex_encoder_select="audio_frame_queue"
@@ -6079,6 +6082,7 @@  enabled libsnappy         && require libsnappy snappy-c.h snappy_compress -lsnap
 enabled libsoxr           && require libsoxr soxr.h soxr_create -lsoxr
 enabled libssh            && require_pkg_config libssh libssh libssh/sftp.h sftp_init
 enabled libspeex          && require_pkg_config libspeex speex speex/speex.h speex_decoder_init
+enabled libspeexdsp       && require_pkg_config libspeexdsp speexdsp speex/speex_echo.h speex_echo_state_init_mc
 enabled libsrt            && require_pkg_config libsrt "srt >= 1.2.0" srt/srt.h srt_socket
 enabled libtensorflow     && require libtensorflow tensorflow/c/c_api.h TF_Version -ltensorflow
 enabled libtesseract      && require_pkg_config libtesseract tesseract tesseract/capi.h TessBaseAPICreate
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 245302bbe8..1c5502f9a6 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -108,6 +108,7 @@  OBJS-$(CONFIG_HIGHPASS_FILTER)               += af_biquads.o
 OBJS-$(CONFIG_HIGHSHELF_FILTER)              += af_biquads.o
 OBJS-$(CONFIG_JOIN_FILTER)                   += af_join.o
 OBJS-$(CONFIG_LADSPA_FILTER)                 += af_ladspa.o
+OBJS-$(CONFIG_LIBSPEEX_AEC_FILTER)           += af_libspeex_aec.o
 OBJS-$(CONFIG_LOUDNORM_FILTER)               += af_loudnorm.o ebur128.o
 OBJS-$(CONFIG_LOWPASS_FILTER)                += af_biquads.o
 OBJS-$(CONFIG_LOWSHELF_FILTER)               += af_biquads.o
diff --git a/libavfilter/af_libspeex_aec.c b/libavfilter/af_libspeex_aec.c
new file mode 100644
index 0000000000..eb96f4e533
--- /dev/null
+++ b/libavfilter/af_libspeex_aec.c
@@ -0,0 +1,293 @@ 
+/*
+ * Speex-based Acoustic Echo Canceller.
+ * Copyright (c) 2018 Arseniy Skvortsov <ettavolt@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Speex-based Acoustic Echo Canceller
+ *
+ * Tries to remove the second input from the first input
+ * as if the 1st was recorded during playback of the 2nd.
+ * No delay is assumed.
+ */
+
+#include <speex/speex_echo.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/opt.h"
+#include "libavutil/samplefmt.h"
+
+#include "audio.h"
+#include "avfilter.h"
+#include "filters.h"
+#include "formats.h"
+#include "internal.h"
+
+typedef struct AECContext {
+    const AVClass *class;       /**< class for AVOptions */
+
+    int frame_size;             /**< number of samples */
+    int filter_length;          /**< number of samples to search echo in */
+    int nb_mics;                /**< number of channels in recording input */
+    int nb_speakers;            /**< number of channels in playback input */
+
+    SpeexEchoState *state;      /**< Speex's own state */
+} AECContext;
+
+#define OFFSET(x) offsetof(AECContext, x)
+#define A AV_OPT_FLAG_AUDIO_PARAM
+#define F AV_OPT_FLAG_FILTERING_PARAM
+static const AVOption libspeex_aec_options[] = {
+    { "frame_size", "Number of samples to process at one time (should correspond to 20 ms, preferably 2ⁿ for FFT)",
+            OFFSET(frame_size), AV_OPT_TYPE_INT, { .i64 = 1024 }, 1, INT_MAX, A|F },
+    { "filter_length", "Number of samples of echo to cancel "
+                       "(should generally correspond to 100-500 ms or a ⅓ of room reverberation time)",
+            OFFSET(filter_length), AV_OPT_TYPE_INT, { .i64 = 5000 }, 1, INT_MAX, A|F },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(libspeex_aec);
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats = NULL;
+    AVFilterChannelLayouts *mics_layouts = NULL;
+    //Filter supports a different layout for this input, need to create another set to enable separate negotiation.
+    AVFilterChannelLayouts *speakers_layouts = NULL;
+    AVFilterFormats *sample_rates = NULL;
+    int ret;
+
+    if ((ret = ff_add_format(&formats, AV_SAMPLE_FMT_S16P)) < 0) {
+        return ret;
+    }
+
+    mics_layouts = ff_all_channel_counts();
+    if (!mics_layouts) {
+        ret = AVERROR(ENOMEM);
+        goto clean;
+    }
+
+    speakers_layouts = ff_all_channel_counts();
+    if (!speakers_layouts) {
+        ret = AVERROR(ENOMEM);
+        goto clean;
+    }
+
+    sample_rates = ff_all_samplerates();
+    if (!sample_rates) {
+        ret = AVERROR(ENOMEM);
+        goto clean;
+    }
+
+    ret = ff_channel_layouts_ref(speakers_layouts, &ctx->inputs[1]->out_channel_layouts);
+    if (ret < 0) {
+        goto clean;
+    }
+
+    //A fail in the middle of these can leave references to a freed memory.
+    //True not only for this (caller) function, but for all callees too.
+    if ((ret = ff_set_common_formats        (ctx, formats     )) < 0 ||
+        (ret = ff_set_common_channel_layouts(ctx, mics_layouts)) < 0 ||
+        (ret = ff_set_common_samplerates    (ctx, sample_rates)) < 0) {
+        goto clean;
+    }
+    return 0;
+
+    clean:
+    if (sample_rates) {
+        av_freep(&sample_rates);
+    }
+
+    if (speakers_layouts) {
+        av_freep(&speakers_layouts);
+    }
+
+    if (mics_layouts) {
+        av_freep(&mics_layouts);
+    }
+
+    if (formats) {
+        av_freep(&formats->formats);
+        av_freep(&formats);
+    }
+    return ret;
+}
+
+static int config_state(AVFilterContext *ctx)
+{
+    AECContext *self      = ctx->priv;
+    if (!self->nb_mics || !self->nb_speakers) {
+        return 0;
+    }
+    self->state = speex_echo_state_init_mc(self->frame_size, self->filter_length, self->nb_mics, self->nb_speakers);
+    if (self->state <= 0) {
+        av_log(ctx, AV_LOG_ERROR, "Cannot initialize libspeex AEC state!");
+        return AVERROR(ENOMEM);
+    }
+
+    av_log(ctx, AV_LOG_VERBOSE, "mics:%d speakers:%d\n", self->nb_mics, self->nb_speakers);
+
+    return 0;
+}
+
+static int config_record_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AECContext *self = ctx->priv;
+    self->nb_mics = inlink->channels;
+    inlink->request_samples = self->frame_size;
+    return config_state(ctx);
+}
+
+static int config_playback_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AECContext *self = ctx->priv;
+    self->nb_speakers = inlink->channels;
+    inlink->request_samples = self->frame_size;
+    return config_state(ctx);
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    AVFilterLink *inlink = ctx->inputs[0];
+    if (inlink) {
+        if (!outlink->time_base.num && !outlink->time_base.den) {
+            outlink->time_base = inlink->time_base;
+        }
+    }
+    return 0;
+}
+
+static int filter_frames(const AVFilterContext *ctx) {
+    AVFilterLink *outlink = ctx->outputs[0];
+    AECContext *self = ctx->priv;
+    AVFilterLink *input_to_clean = ctx->inputs[0];
+    AVFrame *recorded;
+    AVFrame *played;
+    AVFrame *cleaned;
+    int ret;
+
+    if (!ff_inlink_check_available_frame(input_to_clean) ||
+        !ff_inlink_check_available_frame(ctx->inputs[1])) {
+        return 1;
+    }
+
+    ret = ff_inlink_consume_frame(input_to_clean, &recorded);
+    if (ret < 0) {
+        return ret;
+    }
+    ret = ff_inlink_consume_frame(ctx->inputs[1], &played);
+    if (ret < 0) {
+        av_frame_free(&recorded);
+        return ret;
+    }
+    cleaned = ff_get_audio_buffer(outlink, self->frame_size);
+    if (!cleaned) {
+        ret = AVERROR(ENOMEM);
+        av_frame_free(&recorded);
+        av_frame_free(&played);
+        return ret;
+    }
+    speex_echo_cancellation(
+            self->state,
+            (const spx_int16_t *) *recorded->extended_data,
+            (const spx_int16_t *) *played->extended_data,
+            (spx_int16_t *) *cleaned->extended_data
+    );
+
+    av_frame_free(&recorded);
+    av_frame_free(&played);
+    return ff_filter_frame(outlink, cleaned);
+}
+
+static int activate(AVFilterContext *ctx)
+{
+    AVFilterLink *outlink = ctx->outputs[0];
+    int ret;
+
+    FF_FILTER_FORWARD_STATUS_BACK_ALL(outlink, ctx);
+
+    ret = filter_frames(ctx);
+    if (ret <= 0) {
+        return ret;
+    }
+
+    FF_FILTER_FORWARD_STATUS(ctx->inputs[0], outlink);
+    FF_FILTER_FORWARD_STATUS(ctx->inputs[1], outlink);
+    if (ff_outlink_frame_wanted(outlink)) {
+        //One is definitely missing a frame, because we did no processing.
+        if (!ff_inlink_check_available_frame(ctx->inputs[0])) {
+            ff_inlink_request_frame(ctx->inputs[0]);
+        }
+        if (!ff_inlink_check_available_frame(ctx->inputs[1])) {
+            ff_inlink_request_frame(ctx->inputs[1]);
+        }
+        return 0;
+    }
+    return FFERROR_NOT_READY;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    AECContext *self = ctx->priv;
+
+    if (self->state) {
+        speex_echo_state_destroy(self->state);
+    }
+}
+
+static const AVFilterPad avfilter_af_libspeex_aec_inputs[] = {
+    {
+        .name          = "record",
+        .type          = AVMEDIA_TYPE_AUDIO,
+        .needs_fifo    = 1,
+        .config_props  = config_record_input,
+    },
+    {
+        .name          = "playback",
+        .type          = AVMEDIA_TYPE_AUDIO,
+        .needs_fifo    = 1,
+        .config_props  = config_playback_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad avfilter_af_libspeex_aec_outputs[] = {
+    {
+        .name          = "cleaned",
+        .type          = AVMEDIA_TYPE_AUDIO,
+        .config_props  = config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_libspeex_aec = {
+    .name           = "libspeex_aec",
+    .description    = NULL_IF_CONFIG_SMALL("Speex-based acoustic echo cancellation (AEC)."),
+    .priv_size      = sizeof(AECContext),
+    .priv_class     = &libspeex_aec_class,
+    .uninit         = uninit,
+    .activate       = activate,
+    .query_formats  = query_formats,
+    .inputs         = avfilter_af_libspeex_aec_inputs,
+    .outputs        = avfilter_af_libspeex_aec_outputs,
+};
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 2d19929bdc..3db1436c82 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -101,6 +101,7 @@  extern AVFilter ff_af_highpass;
 extern AVFilter ff_af_highshelf;
 extern AVFilter ff_af_join;
 extern AVFilter ff_af_ladspa;
+extern AVFilter ff_af_libspeex_aec;
 extern AVFilter ff_af_loudnorm;
 extern AVFilter ff_af_lowpass;
 extern AVFilter ff_af_lowshelf;
-- 
2.18.0