diff mbox

[FFmpeg-devel,5/9] sbc: implement SBC encoder (low-complexity subband codec)

Message ID 20180221223718.20789-6-aurel@gnuage.org
State Superseded
Headers show

Commit Message

Aurelien Jacobs Feb. 21, 2018, 10:37 p.m. UTC
This was originally based on libsbc, and was fully integrated into ffmpeg.
---
 doc/general.texi         |   2 +-
 libavcodec/Makefile      |   1 +
 libavcodec/allcodecs.c   |   1 +
 libavcodec/sbcdsp.c      | 382 +++++++++++++++++++++++++++++++++++++++++++
 libavcodec/sbcdsp.h      |  83 ++++++++++
 libavcodec/sbcdsp_data.c | 329 +++++++++++++++++++++++++++++++++++++
 libavcodec/sbcdsp_data.h |  55 +++++++
 libavcodec/sbcenc.c      | 411 +++++++++++++++++++++++++++++++++++++++++++++++
 8 files changed, 1263 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/sbcdsp.c
 create mode 100644 libavcodec/sbcdsp.h
 create mode 100644 libavcodec/sbcdsp_data.c
 create mode 100644 libavcodec/sbcdsp_data.h
 create mode 100644 libavcodec/sbcenc.c

Comments

Rostislav Pehlivanov Feb. 22, 2018, 6:18 p.m. UTC | #1
On 21 February 2018 at 22:37, Aurelien Jacobs <aurel@gnuage.org> wrote:

> This was originally based on libsbc, and was fully integrated into ffmpeg.
> ---
>  doc/general.texi         |   2 +-
>  libavcodec/Makefile      |   1 +
>  libavcodec/allcodecs.c   |   1 +
>  libavcodec/sbcdsp.c      | 382 ++++++++++++++++++++++++++++++
> +++++++++++++
>  libavcodec/sbcdsp.h      |  83 ++++++++++
>  libavcodec/sbcdsp_data.c | 329 +++++++++++++++++++++++++++++++++++++
>  libavcodec/sbcdsp_data.h |  55 +++++++
>  libavcodec/sbcenc.c      | 411 ++++++++++++++++++++++++++++++
> +++++++++++++++++
>  8 files changed, 1263 insertions(+), 1 deletion(-)
>  create mode 100644 libavcodec/sbcdsp.c
>  create mode 100644 libavcodec/sbcdsp.h
>  create mode 100644 libavcodec/sbcdsp_data.c
>  create mode 100644 libavcodec/sbcdsp_data.h
>  create mode 100644 libavcodec/sbcenc.c
>
> +
> +#define OFFSET(x) offsetof(SBCEncContext, x)
> +#define AE AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
> +static const AVOption options[] = {
> +    { "joint_stereo", "use joint stereo",
> +      OFFSET(joint_stereo), AV_OPT_TYPE_BOOL, { .i64 =  0 }, 0,   1, AE },
>
+    { "dual_channel", "use dual channel",
> +      OFFSET(dual_channel), AV_OPT_TYPE_BOOL, { .i64 =  0 }, 0,   1, AE },
>

Erm those 2 things should be decided by the encoder, not by exposing them
to the user. The encoder should decide which mode has lower distortion for
a given signal.



> +    { "subbands",     "number of subbands (4 or 8)",
> +      OFFSET(subbands),     AV_OPT_TYPE_INT,  { .i64 =  8 }, 4,   8, AE },
>

The encoder doesn't check if the value isn't 4 or 8 so 5, 6 and 7 are all
accepted. Similar issue to the previous option too.



> +    { "bitpool",      "bitpool value",
> +      OFFSET(bitpool),      AV_OPT_TYPE_INT,  { .i64 = 32 }, 0, 255, AE },
>

This should be controlled by the bitrate setting. Either have a function to
translate bitrate to bitpool value or a table which approximately maps
bitrate values supplied to bitpools. You could expose it directly as well
as mapping it to a bitrate value by using the global_quality setting so it
shouldn't be a custom encoder option.



> +    { "blocks",       "number of blocks (4, 8, 12 or 16)",
> +      OFFSET(blocks),       AV_OPT_TYPE_INT,  { .i64 = 16 }, 4,  16, AE },
> +    { "snr",          "use SNR mode (instead of loudness)",
> +      OFFSET(allocation),   AV_OPT_TYPE_BOOL, { .i64 =  0 }, 0,   1, AE },
>

SNR mode too needs to be decided by the encoder rather than exposing it as
a setting.



> +    { "msbc",         "use mSBC mode (wideband speech mono SBC)",
>

Add a profile fallback setting for this as well, like in aac where -aac_ltp
turns LTP mode on and -profile:a aac_ltp does the same.


You don't have to make the encoder decide which stereo coupling mode or
snr/loudness setting to use, you can implement that with a later patch.
I think you should remove the "blocks" and "subbands" settings as well and
instead replace those with a single "latency" setting like the native Opus
encoder in milliseconds which would adjust both of them on init to set the
frame size. This would also allow the encoder to change them. Again, you
don't have to do this now, you can send a patch which adds a "latency"
option later.
So in total, only 2 options would be needed, "msbc" as an additional way to
use msbc and "latency", which can be added later. For now you should set
all unexposed options to do something safe by default.

Apart from that, I tested the encoder, valgrind looks clean, the SIMD is
bitexact and all advertised samplerates are supported.
diff mbox

Patch

diff --git a/doc/general.texi b/doc/general.texi
index 930c1e8bf2..bf62288f64 100644
--- a/doc/general.texi
+++ b/doc/general.texi
@@ -1118,7 +1118,7 @@  following image formats are supported:
     @tab Real low bitrate AC-3 codec
 @item RealAudio Lossless     @tab     @tab  X
 @item RealAudio SIPR / ACELP.NET @tab     @tab  X
-@item SBC (low-complexity subband codec) @tab     @tab  X
+@item SBC (low-complexity subband codec) @tab  X  @tab  X
     @tab Used in Bluetooth A2DP
 @item Shorten                @tab     @tab  X
 @item Sierra VMD audio       @tab     @tab  X
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 53d199201b..ddae75bb9a 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -584,6 +584,7 @@  OBJS-$(CONFIG_SUNRAST_DECODER)         += sunrast.o
 OBJS-$(CONFIG_SUNRAST_ENCODER)         += sunrastenc.o
 OBJS-$(CONFIG_LIBRSVG_DECODER)         += librsvgdec.o
 OBJS-$(CONFIG_SBC_DECODER)             += sbcdec.o sbcdec_data.o sbc.o
+OBJS-$(CONFIG_SBC_ENCODER)             += sbcenc.o sbc.o sbcdsp.o sbcdsp_data.o
 OBJS-$(CONFIG_SVQ1_DECODER)            += svq1dec.o svq1.o svq13.o h263data.o
 OBJS-$(CONFIG_SVQ1_ENCODER)            += svq1enc.o svq1.o  h263data.o  \
                                           h263.o ituh263enc.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 7d9097dcf7..de3bea8bc0 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -453,6 +453,7 @@  extern AVCodec ff_ra_144_encoder;
 extern AVCodec ff_ra_144_decoder;
 extern AVCodec ff_ra_288_decoder;
 extern AVCodec ff_ralf_decoder;
+extern AVCodec ff_sbc_encoder;
 extern AVCodec ff_sbc_decoder;
 extern AVCodec ff_shorten_decoder;
 extern AVCodec ff_sipr_decoder;
diff --git a/libavcodec/sbcdsp.c b/libavcodec/sbcdsp.c
new file mode 100644
index 0000000000..e155387f0d
--- /dev/null
+++ b/libavcodec/sbcdsp.c
@@ -0,0 +1,382 @@ 
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2012-2013  Intel Corporation
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC basic "building bricks"
+ */
+
+#include <stdint.h>
+#include <limits.h>
+#include <string.h>
+#include "libavutil/common.h"
+#include "libavutil/intmath.h"
+#include "libavutil/intreadwrite.h"
+#include "sbc.h"
+#include "sbcdsp.h"
+#include "sbcdsp_data.h"
+
+/*
+ * A reference C code of analysis filter with SIMD-friendly tables
+ * reordering and code layout. This code can be used to develop platform
+ * specific SIMD optimizations. Also it may be used as some kind of test
+ * for compiler autovectorization capabilities (who knows, if the compiler
+ * is very good at this stuff, hand optimized assembly may be not strictly
+ * needed for some platform).
+ *
+ * Note: It is also possible to make a simple variant of analysis filter,
+ * which needs only a single constants table without taking care about
+ * even/odd cases. This simple variant of filter can be implemented without
+ * input data permutation. The only thing that would be lost is the
+ * possibility to use pairwise SIMD multiplications. But for some simple
+ * CPU cores without SIMD extensions it can be useful. If anybody is
+ * interested in implementing such variant of a filter, sourcecode from
+ * bluez versions 4.26/4.27 can be used as a reference and the history of
+ * the changes in git repository done around that time may be worth checking.
+ */
+
+static av_always_inline void sbc_analyze_simd(const int16_t *in, int32_t *out,
+                                              const int16_t *consts,
+                                              unsigned subbands)
+{
+    int32_t t1[8];
+    int16_t t2[8];
+    int i, j, hop = 0;
+
+    /* rounding coefficient */
+    for (i = 0; i < subbands; i++)
+        t1[i] = 1 << (SBC_PROTO_FIXED_SCALE - 1);
+
+    /* low pass polyphase filter */
+    for (hop = 0; hop < 10*subbands; hop += 2*subbands)
+        for (i = 0; i < 2*subbands; i++)
+            t1[i >> 1] += in[hop + i] * consts[hop + i];
+
+    /* scaling */
+    for (i = 0; i < subbands; i++)
+        t2[i] = t1[i] >> SBC_PROTO_FIXED_SCALE;
+
+    memset(t1, 0, sizeof(t1));
+
+    /* do the cos transform */
+    for (i = 0; i < subbands/2; i++)
+        for (j = 0; j < 2*subbands; j++)
+            t1[j>>1] += t2[i * 2 + (j&1)] * consts[10*subbands + i*2*subbands + j];
+
+    for (i = 0; i < subbands; i++)
+        out[i] = t1[i] >> (SBC_COS_TABLE_FIXED_SCALE - SCALE_OUT_BITS);
+}
+
+static void sbc_analyze_4_simd(const int16_t *in, int32_t *out,
+                               const int16_t *consts)
+{
+    sbc_analyze_simd(in, out, consts, 4);
+}
+
+static void sbc_analyze_8_simd(const int16_t *in, int32_t *out,
+                               const int16_t *consts)
+{
+    sbc_analyze_simd(in, out, consts, 8);
+}
+
+static inline void sbc_analyze_4b_4s_simd(SBCDSPContext *s,
+                                          int16_t *x, int32_t *out, int out_stride)
+{
+    /* Analyze blocks */
+    s->sbc_analyze_4(x + 12, out, ff_sbcdsp_analysis_consts_fixed4_simd_odd);
+    out += out_stride;
+    s->sbc_analyze_4(x + 8, out, ff_sbcdsp_analysis_consts_fixed4_simd_even);
+    out += out_stride;
+    s->sbc_analyze_4(x + 4, out, ff_sbcdsp_analysis_consts_fixed4_simd_odd);
+    out += out_stride;
+    s->sbc_analyze_4(x + 0, out, ff_sbcdsp_analysis_consts_fixed4_simd_even);
+}
+
+static inline void sbc_analyze_4b_8s_simd(SBCDSPContext *s,
+                                          int16_t *x, int32_t *out, int out_stride)
+{
+    /* Analyze blocks */
+    s->sbc_analyze_8(x + 24, out, ff_sbcdsp_analysis_consts_fixed8_simd_odd);
+    out += out_stride;
+    s->sbc_analyze_8(x + 16, out, ff_sbcdsp_analysis_consts_fixed8_simd_even);
+    out += out_stride;
+    s->sbc_analyze_8(x + 8, out, ff_sbcdsp_analysis_consts_fixed8_simd_odd);
+    out += out_stride;
+    s->sbc_analyze_8(x + 0, out, ff_sbcdsp_analysis_consts_fixed8_simd_even);
+}
+
+static inline void sbc_analyze_1b_8s_simd_even(SBCDSPContext *s,
+                                               int16_t *x, int32_t *out,
+                                               int out_stride);
+
+static inline void sbc_analyze_1b_8s_simd_odd(SBCDSPContext *s,
+                                              int16_t *x, int32_t *out,
+                                              int out_stride)
+{
+    s->sbc_analyze_8(x, out, ff_sbcdsp_analysis_consts_fixed8_simd_odd);
+    s->sbc_analyze_8s = sbc_analyze_1b_8s_simd_even;
+}
+
+static inline void sbc_analyze_1b_8s_simd_even(SBCDSPContext *s,
+                                               int16_t *x, int32_t *out,
+                                               int out_stride)
+{
+    s->sbc_analyze_8(x, out, ff_sbcdsp_analysis_consts_fixed8_simd_even);
+    s->sbc_analyze_8s = sbc_analyze_1b_8s_simd_odd;
+}
+
+/*
+ * Input data processing functions. The data is endian converted if needed,
+ * channels are deintrleaved and audio samples are reordered for use in
+ * SIMD-friendly analysis filter function. The results are put into "X"
+ * array, getting appended to the previous data (or it is better to say
+ * prepended, as the buffer is filled from top to bottom). Old data is
+ * discarded when neededed, but availability of (10 * nrof_subbands)
+ * contiguous samples is always guaranteed for the input to the analysis
+ * filter. This is achieved by copying a sufficient part of old data
+ * to the top of the buffer on buffer wraparound.
+ */
+
+static int sbc_enc_process_input_4s(int position, const uint8_t *pcm,
+                                    int16_t X[2][SBC_X_BUFFER_SIZE],
+                                    int nsamples, int nchannels)
+{
+    int c;
+
+    /* handle X buffer wraparound */
+    if (position < nsamples) {
+        for (c = 0; c < nchannels; c++)
+            memcpy(&X[c][SBC_X_BUFFER_SIZE - 40], &X[c][position],
+                            36 * sizeof(int16_t));
+        position = SBC_X_BUFFER_SIZE - 40;
+    }
+
+    /* copy/permutate audio samples */
+    for (; nsamples >= 8; nsamples -= 8, pcm += 16 * nchannels) {
+        position -= 8;
+        for (c = 0; c < nchannels; c++) {
+            int16_t *x = &X[c][position];
+            x[0] = AV_RN16(pcm + 14*nchannels + 2*c);
+            x[1] = AV_RN16(pcm +  6*nchannels + 2*c);
+            x[2] = AV_RN16(pcm + 12*nchannels + 2*c);
+            x[3] = AV_RN16(pcm +  8*nchannels + 2*c);
+            x[4] = AV_RN16(pcm +  0*nchannels + 2*c);
+            x[5] = AV_RN16(pcm +  4*nchannels + 2*c);
+            x[6] = AV_RN16(pcm +  2*nchannels + 2*c);
+            x[7] = AV_RN16(pcm + 10*nchannels + 2*c);
+        }
+    }
+
+    return position;
+}
+
+static int sbc_enc_process_input_8s(int position, const uint8_t *pcm,
+                                    int16_t X[2][SBC_X_BUFFER_SIZE],
+                                    int nsamples, int nchannels)
+{
+    int c;
+
+    /* handle X buffer wraparound */
+    if (position < nsamples) {
+        for (c = 0; c < nchannels; c++)
+            memcpy(&X[c][SBC_X_BUFFER_SIZE - 72], &X[c][position],
+                            72 * sizeof(int16_t));
+        position = SBC_X_BUFFER_SIZE - 72;
+    }
+
+    if (position % 16 == 8) {
+        position -= 8;
+        nsamples -= 8;
+        for (c = 0; c < nchannels; c++) {
+            int16_t *x = &X[c][position];
+            x[0] = AV_RN16(pcm + 14*nchannels + 2*c);
+            x[2] = AV_RN16(pcm + 12*nchannels + 2*c);
+            x[3] = AV_RN16(pcm +  0*nchannels + 2*c);
+            x[4] = AV_RN16(pcm + 10*nchannels + 2*c);
+            x[5] = AV_RN16(pcm +  2*nchannels + 2*c);
+            x[6] = AV_RN16(pcm +  8*nchannels + 2*c);
+            x[7] = AV_RN16(pcm +  4*nchannels + 2*c);
+            x[8] = AV_RN16(pcm +  6*nchannels + 2*c);
+        }
+        pcm += 16 * nchannels;
+    }
+
+    /* copy/permutate audio samples */
+    for (; nsamples >= 16; nsamples -= 16, pcm += 32 * nchannels) {
+        position -= 16;
+        for (c = 0; c < nchannels; c++) {
+            int16_t *x = &X[c][position];
+            x[0]  = AV_RN16(pcm + 30*nchannels + 2*c);
+            x[1]  = AV_RN16(pcm + 14*nchannels + 2*c);
+            x[2]  = AV_RN16(pcm + 28*nchannels + 2*c);
+            x[3]  = AV_RN16(pcm + 16*nchannels + 2*c);
+            x[4]  = AV_RN16(pcm + 26*nchannels + 2*c);
+            x[5]  = AV_RN16(pcm + 18*nchannels + 2*c);
+            x[6]  = AV_RN16(pcm + 24*nchannels + 2*c);
+            x[7]  = AV_RN16(pcm + 20*nchannels + 2*c);
+            x[8]  = AV_RN16(pcm + 22*nchannels + 2*c);
+            x[9]  = AV_RN16(pcm +  6*nchannels + 2*c);
+            x[10] = AV_RN16(pcm + 12*nchannels + 2*c);
+            x[11] = AV_RN16(pcm +  0*nchannels + 2*c);
+            x[12] = AV_RN16(pcm + 10*nchannels + 2*c);
+            x[13] = AV_RN16(pcm +  2*nchannels + 2*c);
+            x[14] = AV_RN16(pcm +  8*nchannels + 2*c);
+            x[15] = AV_RN16(pcm +  4*nchannels + 2*c);
+        }
+    }
+
+    if (nsamples == 8) {
+        position -= 8;
+        for (c = 0; c < nchannels; c++) {
+            int16_t *x = &X[c][position];
+            x[-7] = AV_RN16(pcm + 14*nchannels + 2*c);
+            x[1]  = AV_RN16(pcm +  6*nchannels + 2*c);
+            x[2]  = AV_RN16(pcm + 12*nchannels + 2*c);
+            x[3]  = AV_RN16(pcm +  0*nchannels + 2*c);
+            x[4]  = AV_RN16(pcm + 10*nchannels + 2*c);
+            x[5]  = AV_RN16(pcm +  2*nchannels + 2*c);
+            x[6]  = AV_RN16(pcm +  8*nchannels + 2*c);
+            x[7]  = AV_RN16(pcm +  4*nchannels + 2*c);
+        }
+    }
+
+    return position;
+}
+
+static void sbc_calc_scalefactors(int32_t sb_sample_f[16][2][8],
+                                  uint32_t scale_factor[2][8],
+                                  int blocks, int channels, int subbands)
+{
+    int ch, sb, blk;
+    for (ch = 0; ch < channels; ch++) {
+        for (sb = 0; sb < subbands; sb++) {
+            uint32_t x = 1 << SCALE_OUT_BITS;
+            for (blk = 0; blk < blocks; blk++) {
+                int32_t tmp = FFABS(sb_sample_f[blk][ch][sb]);
+                if (tmp != 0)
+                    x |= tmp - 1;
+            }
+            scale_factor[ch][sb] = (31 - SCALE_OUT_BITS) - ff_clz(x);
+        }
+    }
+}
+
+static int sbc_calc_scalefactors_j(int32_t sb_sample_f[16][2][8],
+                                   uint32_t scale_factor[2][8],
+                                   int blocks, int subbands)
+{
+    int blk, joint = 0;
+    int32_t tmp0, tmp1;
+    uint32_t x, y;
+
+    /* last subband does not use joint stereo */
+    int sb = subbands - 1;
+    x = 1 << SCALE_OUT_BITS;
+    y = 1 << SCALE_OUT_BITS;
+    for (blk = 0; blk < blocks; blk++) {
+        tmp0 = FFABS(sb_sample_f[blk][0][sb]);
+        tmp1 = FFABS(sb_sample_f[blk][1][sb]);
+        if (tmp0 != 0)
+            x |= tmp0 - 1;
+        if (tmp1 != 0)
+            y |= tmp1 - 1;
+    }
+    scale_factor[0][sb] = (31 - SCALE_OUT_BITS) - ff_clz(x);
+    scale_factor[1][sb] = (31 - SCALE_OUT_BITS) - ff_clz(y);
+
+    /* the rest of subbands can use joint stereo */
+    while (--sb >= 0) {
+        int32_t sb_sample_j[16][2];
+        x = 1 << SCALE_OUT_BITS;
+        y = 1 << SCALE_OUT_BITS;
+        for (blk = 0; blk < blocks; blk++) {
+            tmp0 = sb_sample_f[blk][0][sb];
+            tmp1 = sb_sample_f[blk][1][sb];
+            sb_sample_j[blk][0] = (tmp0 >> 1) + (tmp1 >> 1);
+            sb_sample_j[blk][1] = (tmp0 >> 1) - (tmp1 >> 1);
+            tmp0 = FFABS(tmp0);
+            tmp1 = FFABS(tmp1);
+            if (tmp0 != 0)
+                x |= tmp0 - 1;
+            if (tmp1 != 0)
+                y |= tmp1 - 1;
+        }
+        scale_factor[0][sb] = (31 - SCALE_OUT_BITS) -
+            ff_clz(x);
+        scale_factor[1][sb] = (31 - SCALE_OUT_BITS) -
+            ff_clz(y);
+        x = 1 << SCALE_OUT_BITS;
+        y = 1 << SCALE_OUT_BITS;
+        for (blk = 0; blk < blocks; blk++) {
+            tmp0 = FFABS(sb_sample_j[blk][0]);
+            tmp1 = FFABS(sb_sample_j[blk][1]);
+            if (tmp0 != 0)
+                x |= tmp0 - 1;
+            if (tmp1 != 0)
+                y |= tmp1 - 1;
+        }
+        x = (31 - SCALE_OUT_BITS) - ff_clz(x);
+        y = (31 - SCALE_OUT_BITS) - ff_clz(y);
+
+        /* decide whether to use joint stereo for this subband */
+        if ((scale_factor[0][sb] + scale_factor[1][sb]) > x + y) {
+            joint |= 1 << (subbands - 1 - sb);
+            scale_factor[0][sb] = x;
+            scale_factor[1][sb] = y;
+            for (blk = 0; blk < blocks; blk++) {
+                sb_sample_f[blk][0][sb] = sb_sample_j[blk][0];
+                sb_sample_f[blk][1][sb] = sb_sample_j[blk][1];
+            }
+        }
+    }
+
+    /* bitmask with the information about subbands using joint stereo */
+    return joint;
+}
+
+/*
+ * Detect CPU features and setup function pointers
+ */
+av_cold void ff_sbcdsp_init(SBCDSPContext *s)
+{
+    /* Default implementation for analyze functions */
+    s->sbc_analyze_4 = sbc_analyze_4_simd;
+    s->sbc_analyze_8 = sbc_analyze_8_simd;
+    s->sbc_analyze_4s = sbc_analyze_4b_4s_simd;
+    if (s->increment == 1)
+        s->sbc_analyze_8s = sbc_analyze_1b_8s_simd_odd;
+    else
+        s->sbc_analyze_8s = sbc_analyze_4b_8s_simd;
+
+    /* Default implementation for input reordering / deinterleaving */
+    s->sbc_enc_process_input_4s = sbc_enc_process_input_4s;
+    s->sbc_enc_process_input_8s = sbc_enc_process_input_8s;
+
+    /* Default implementation for scale factors calculation */
+    s->sbc_calc_scalefactors = sbc_calc_scalefactors;
+    s->sbc_calc_scalefactors_j = sbc_calc_scalefactors_j;
+}
diff --git a/libavcodec/sbcdsp.h b/libavcodec/sbcdsp.h
new file mode 100644
index 0000000000..66ed7d324e
--- /dev/null
+++ b/libavcodec/sbcdsp.h
@@ -0,0 +1,83 @@ 
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC basic "building bricks"
+ */
+
+#ifndef AVCODEC_SBCDSP_H
+#define AVCODEC_SBCDSP_H
+
+#include "sbc.h"
+#include "sbcdsp_data.h"
+
+#define SCALE_OUT_BITS 15
+#define SBC_X_BUFFER_SIZE 328
+
+typedef struct sbc_dsp_context SBCDSPContext;
+
+struct sbc_dsp_context {
+    int position;
+    /* Number of consecutive blocks handled by the encoder */
+    uint8_t increment;
+    DECLARE_ALIGNED(SBC_ALIGN, int16_t, X)[2][SBC_X_BUFFER_SIZE];
+    void (*sbc_analyze_4)(const int16_t *in, int32_t *out, const int16_t *consts);
+    void (*sbc_analyze_8)(const int16_t *in, int32_t *out, const int16_t *consts);
+    /* Polyphase analysis filter for 4 subbands configuration,
+     * it handles "increment" blocks at once */
+    void (*sbc_analyze_4s)(SBCDSPContext *s,
+                           int16_t *x, int32_t *out, int out_stride);
+    /* Polyphase analysis filter for 8 subbands configuration,
+     * it handles "increment" blocks at once */
+    void (*sbc_analyze_8s)(SBCDSPContext *s,
+                           int16_t *x, int32_t *out, int out_stride);
+    /* Process input data (deinterleave, endian conversion, reordering),
+     * depending on the number of subbands and input data byte order */
+    int (*sbc_enc_process_input_4s)(int position, const uint8_t *pcm,
+                                    int16_t X[2][SBC_X_BUFFER_SIZE],
+                                    int nsamples, int nchannels);
+    int (*sbc_enc_process_input_8s)(int position, const uint8_t *pcm,
+                                    int16_t X[2][SBC_X_BUFFER_SIZE],
+                                    int nsamples, int nchannels);
+    /* Scale factors calculation */
+    void (*sbc_calc_scalefactors)(int32_t sb_sample_f[16][2][8],
+                                  uint32_t scale_factor[2][8],
+                                  int blocks, int channels, int subbands);
+    /* Scale factors calculation with joint stereo support */
+    int (*sbc_calc_scalefactors_j)(int32_t sb_sample_f[16][2][8],
+                                   uint32_t scale_factor[2][8],
+                                   int blocks, int subbands);
+};
+
+/*
+ * Initialize pointers to the functions which are the basic "building bricks"
+ * of SBC codec. Best implementation is selected based on target CPU
+ * capabilities.
+ */
+void ff_sbcdsp_init(SBCDSPContext *s);
+
+#endif /* AVCODEC_SBCDSP_H */
diff --git a/libavcodec/sbcdsp_data.c b/libavcodec/sbcdsp_data.c
new file mode 100644
index 0000000000..78c07c0077
--- /dev/null
+++ b/libavcodec/sbcdsp_data.c
@@ -0,0 +1,329 @@ 
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * miscellaneous SBC tables
+ */
+
+#include "sbcdsp_data.h"
+
+#define F_PROTO(x) ((int32_t) (((x) * 2) * ((int32_t) 1 << 15) + 0.5))
+#define F_COS(x)   ((int32_t) (((x)    ) * ((int32_t) 1 << 15) + 0.5))
+
+/*
+ * Constant tables for the use in SIMD optimized analysis filters
+ * Each table consists of two parts:
+ * 1. reordered "proto" table
+ * 2. reordered "cos" table
+ *
+ * Due to non-symmetrical reordering, separate tables for "even"
+ * and "odd" cases are needed
+ */
+
+DECLARE_ALIGNED(SBC_ALIGN, const int16_t, ff_sbcdsp_analysis_consts_fixed4_simd_even)[40 + 16] = {
+#define C0 1.0932568993
+#define C1 1.3056875580
+#define C2 1.3056875580
+#define C3 1.6772280856
+
+#define F(x) F_PROTO(x)
+     F(0.00000000E+00 * C0),  F(3.83720193E-03 * C0),
+     F(5.36548976E-04 * C1),  F(2.73370904E-03 * C1),
+     F(3.06012286E-03 * C2),  F(3.89205149E-03 * C2),
+     F(0.00000000E+00 * C3), -F(1.49188357E-03 * C3),
+     F(1.09137620E-02 * C0),  F(2.58767811E-02 * C0),
+     F(2.04385087E-02 * C1),  F(3.21939290E-02 * C1),
+     F(7.76463494E-02 * C2),  F(6.13245186E-03 * C2),
+     F(0.00000000E+00 * C3), -F(2.88757392E-02 * C3),
+     F(1.35593274E-01 * C0),  F(2.94315332E-01 * C0),
+     F(1.94987841E-01 * C1),  F(2.81828203E-01 * C1),
+    -F(1.94987841E-01 * C2),  F(2.81828203E-01 * C2),
+     F(0.00000000E+00 * C3), -F(2.46636662E-01 * C3),
+    -F(1.35593274E-01 * C0),  F(2.58767811E-02 * C0),
+    -F(7.76463494E-02 * C1),  F(6.13245186E-03 * C1),
+    -F(2.04385087E-02 * C2),  F(3.21939290E-02 * C2),
+     F(0.00000000E+00 * C3),  F(2.88217274E-02 * C3),
+    -F(1.09137620E-02 * C0),  F(3.83720193E-03 * C0),
+    -F(3.06012286E-03 * C1),  F(3.89205149E-03 * C1),
+    -F(5.36548976E-04 * C2),  F(2.73370904E-03 * C2),
+     F(0.00000000E+00 * C3), -F(1.86581691E-03 * C3),
+#undef F
+#define F(x) F_COS(x)
+     F(0.7071067812 / C0),  F(0.9238795325 / C1),
+    -F(0.7071067812 / C0),  F(0.3826834324 / C1),
+    -F(0.7071067812 / C0), -F(0.3826834324 / C1),
+     F(0.7071067812 / C0), -F(0.9238795325 / C1),
+     F(0.3826834324 / C2), -F(1.0000000000 / C3),
+    -F(0.9238795325 / C2), -F(1.0000000000 / C3),
+     F(0.9238795325 / C2), -F(1.0000000000 / C3),
+    -F(0.3826834324 / C2), -F(1.0000000000 / C3),
+#undef F
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+};
+
+DECLARE_ALIGNED(SBC_ALIGN, const int16_t, ff_sbcdsp_analysis_consts_fixed4_simd_odd)[40 + 16] = {
+#define C0 1.3056875580
+#define C1 1.6772280856
+#define C2 1.0932568993
+#define C3 1.3056875580
+
+#define F(x) F_PROTO(x)
+     F(2.73370904E-03 * C0),  F(5.36548976E-04 * C0),
+    -F(1.49188357E-03 * C1),  F(0.00000000E+00 * C1),
+     F(3.83720193E-03 * C2),  F(1.09137620E-02 * C2),
+     F(3.89205149E-03 * C3),  F(3.06012286E-03 * C3),
+     F(3.21939290E-02 * C0),  F(2.04385087E-02 * C0),
+    -F(2.88757392E-02 * C1),  F(0.00000000E+00 * C1),
+     F(2.58767811E-02 * C2),  F(1.35593274E-01 * C2),
+     F(6.13245186E-03 * C3),  F(7.76463494E-02 * C3),
+     F(2.81828203E-01 * C0),  F(1.94987841E-01 * C0),
+    -F(2.46636662E-01 * C1),  F(0.00000000E+00 * C1),
+     F(2.94315332E-01 * C2), -F(1.35593274E-01 * C2),
+     F(2.81828203E-01 * C3), -F(1.94987841E-01 * C3),
+     F(6.13245186E-03 * C0), -F(7.76463494E-02 * C0),
+     F(2.88217274E-02 * C1),  F(0.00000000E+00 * C1),
+     F(2.58767811E-02 * C2), -F(1.09137620E-02 * C2),
+     F(3.21939290E-02 * C3), -F(2.04385087E-02 * C3),
+     F(3.89205149E-03 * C0), -F(3.06012286E-03 * C0),
+    -F(1.86581691E-03 * C1),  F(0.00000000E+00 * C1),
+     F(3.83720193E-03 * C2),  F(0.00000000E+00 * C2),
+     F(2.73370904E-03 * C3), -F(5.36548976E-04 * C3),
+#undef F
+#define F(x) F_COS(x)
+     F(0.9238795325 / C0), -F(1.0000000000 / C1),
+     F(0.3826834324 / C0), -F(1.0000000000 / C1),
+    -F(0.3826834324 / C0), -F(1.0000000000 / C1),
+    -F(0.9238795325 / C0), -F(1.0000000000 / C1),
+     F(0.7071067812 / C2),  F(0.3826834324 / C3),
+    -F(0.7071067812 / C2), -F(0.9238795325 / C3),
+    -F(0.7071067812 / C2),  F(0.9238795325 / C3),
+     F(0.7071067812 / C2), -F(0.3826834324 / C3),
+#undef F
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+};
+
+DECLARE_ALIGNED(SBC_ALIGN, const int16_t, ff_sbcdsp_analysis_consts_fixed8_simd_even)[80 + 64] = {
+#define C0 2.7906148894
+#define C1 2.4270044280
+#define C2 2.8015616024
+#define C3 3.1710363741
+#define C4 2.5377944043
+#define C5 2.4270044280
+#define C6 2.8015616024
+#define C7 3.1710363741
+
+#define F(x) F_PROTO(x)
+     F(0.00000000E+00 * C0),  F(2.01182542E-03 * C0),
+     F(1.56575398E-04 * C1),  F(1.78371725E-03 * C1),
+     F(3.43256425E-04 * C2),  F(1.47640169E-03 * C2),
+     F(5.54620202E-04 * C3),  F(1.13992507E-03 * C3),
+    -F(8.23919506E-04 * C4),  F(0.00000000E+00 * C4),
+     F(2.10371989E-03 * C5),  F(3.49717454E-03 * C5),
+     F(1.99454554E-03 * C6),  F(1.64973098E-03 * C6),
+     F(1.61656283E-03 * C7),  F(1.78805361E-04 * C7),
+     F(5.65949473E-03 * C0),  F(1.29371806E-02 * C0),
+     F(8.02941163E-03 * C1),  F(1.53184106E-02 * C1),
+     F(1.04584443E-02 * C2),  F(1.62208471E-02 * C2),
+     F(1.27472335E-02 * C3),  F(1.59045603E-02 * C3),
+    -F(1.46525263E-02 * C4),  F(0.00000000E+00 * C4),
+     F(8.85757540E-03 * C5),  F(5.31873032E-02 * C5),
+     F(2.92408442E-03 * C6),  F(3.90751381E-02 * C6),
+    -F(4.91578024E-03 * C7),  F(2.61098752E-02 * C7),
+     F(6.79989431E-02 * C0),  F(1.46955068E-01 * C0),
+     F(8.29847578E-02 * C1),  F(1.45389847E-01 * C1),
+     F(9.75753918E-02 * C2),  F(1.40753505E-01 * C2),
+     F(1.11196689E-01 * C3),  F(1.33264415E-01 * C3),
+    -F(1.23264548E-01 * C4),  F(0.00000000E+00 * C4),
+     F(1.45389847E-01 * C5), -F(8.29847578E-02 * C5),
+     F(1.40753505E-01 * C6), -F(9.75753918E-02 * C6),
+     F(1.33264415E-01 * C7), -F(1.11196689E-01 * C7),
+    -F(6.79989431E-02 * C0),  F(1.29371806E-02 * C0),
+    -F(5.31873032E-02 * C1),  F(8.85757540E-03 * C1),
+    -F(3.90751381E-02 * C2),  F(2.92408442E-03 * C2),
+    -F(2.61098752E-02 * C3), -F(4.91578024E-03 * C3),
+     F(1.46404076E-02 * C4),  F(0.00000000E+00 * C4),
+     F(1.53184106E-02 * C5), -F(8.02941163E-03 * C5),
+     F(1.62208471E-02 * C6), -F(1.04584443E-02 * C6),
+     F(1.59045603E-02 * C7), -F(1.27472335E-02 * C7),
+    -F(5.65949473E-03 * C0),  F(2.01182542E-03 * C0),
+    -F(3.49717454E-03 * C1),  F(2.10371989E-03 * C1),
+    -F(1.64973098E-03 * C2),  F(1.99454554E-03 * C2),
+    -F(1.78805361E-04 * C3),  F(1.61656283E-03 * C3),
+    -F(9.02154502E-04 * C4),  F(0.00000000E+00 * C4),
+     F(1.78371725E-03 * C5), -F(1.56575398E-04 * C5),
+     F(1.47640169E-03 * C6), -F(3.43256425E-04 * C6),
+     F(1.13992507E-03 * C7), -F(5.54620202E-04 * C7),
+#undef F
+#define F(x) F_COS(x)
+     F(0.7071067812 / C0),  F(0.8314696123 / C1),
+    -F(0.7071067812 / C0), -F(0.1950903220 / C1),
+    -F(0.7071067812 / C0), -F(0.9807852804 / C1),
+     F(0.7071067812 / C0), -F(0.5555702330 / C1),
+     F(0.7071067812 / C0),  F(0.5555702330 / C1),
+    -F(0.7071067812 / C0),  F(0.9807852804 / C1),
+    -F(0.7071067812 / C0),  F(0.1950903220 / C1),
+     F(0.7071067812 / C0), -F(0.8314696123 / C1),
+     F(0.9238795325 / C2),  F(0.9807852804 / C3),
+     F(0.3826834324 / C2),  F(0.8314696123 / C3),
+    -F(0.3826834324 / C2),  F(0.5555702330 / C3),
+    -F(0.9238795325 / C2),  F(0.1950903220 / C3),
+    -F(0.9238795325 / C2), -F(0.1950903220 / C3),
+    -F(0.3826834324 / C2), -F(0.5555702330 / C3),
+     F(0.3826834324 / C2), -F(0.8314696123 / C3),
+     F(0.9238795325 / C2), -F(0.9807852804 / C3),
+    -F(1.0000000000 / C4),  F(0.5555702330 / C5),
+    -F(1.0000000000 / C4), -F(0.9807852804 / C5),
+    -F(1.0000000000 / C4),  F(0.1950903220 / C5),
+    -F(1.0000000000 / C4),  F(0.8314696123 / C5),
+    -F(1.0000000000 / C4), -F(0.8314696123 / C5),
+    -F(1.0000000000 / C4), -F(0.1950903220 / C5),
+    -F(1.0000000000 / C4),  F(0.9807852804 / C5),
+    -F(1.0000000000 / C4), -F(0.5555702330 / C5),
+     F(0.3826834324 / C6),  F(0.1950903220 / C7),
+    -F(0.9238795325 / C6), -F(0.5555702330 / C7),
+     F(0.9238795325 / C6),  F(0.8314696123 / C7),
+    -F(0.3826834324 / C6), -F(0.9807852804 / C7),
+    -F(0.3826834324 / C6),  F(0.9807852804 / C7),
+     F(0.9238795325 / C6), -F(0.8314696123 / C7),
+    -F(0.9238795325 / C6),  F(0.5555702330 / C7),
+     F(0.3826834324 / C6), -F(0.1950903220 / C7),
+#undef F
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+};
+
+DECLARE_ALIGNED(SBC_ALIGN, const int16_t, ff_sbcdsp_analysis_consts_fixed8_simd_odd)[80 + 64] = {
+#define C0 2.5377944043
+#define C1 2.4270044280
+#define C2 2.8015616024
+#define C3 3.1710363741
+#define C4 2.7906148894
+#define C5 2.4270044280
+#define C6 2.8015616024
+#define C7 3.1710363741
+
+#define F(x) F_PROTO(x)
+     F(0.00000000E+00 * C0), -F(8.23919506E-04 * C0),
+     F(1.56575398E-04 * C1),  F(1.78371725E-03 * C1),
+     F(3.43256425E-04 * C2),  F(1.47640169E-03 * C2),
+     F(5.54620202E-04 * C3),  F(1.13992507E-03 * C3),
+     F(2.01182542E-03 * C4),  F(5.65949473E-03 * C4),
+     F(2.10371989E-03 * C5),  F(3.49717454E-03 * C5),
+     F(1.99454554E-03 * C6),  F(1.64973098E-03 * C6),
+     F(1.61656283E-03 * C7),  F(1.78805361E-04 * C7),
+     F(0.00000000E+00 * C0), -F(1.46525263E-02 * C0),
+     F(8.02941163E-03 * C1),  F(1.53184106E-02 * C1),
+     F(1.04584443E-02 * C2),  F(1.62208471E-02 * C2),
+     F(1.27472335E-02 * C3),  F(1.59045603E-02 * C3),
+     F(1.29371806E-02 * C4),  F(6.79989431E-02 * C4),
+     F(8.85757540E-03 * C5),  F(5.31873032E-02 * C5),
+     F(2.92408442E-03 * C6),  F(3.90751381E-02 * C6),
+    -F(4.91578024E-03 * C7),  F(2.61098752E-02 * C7),
+     F(0.00000000E+00 * C0), -F(1.23264548E-01 * C0),
+     F(8.29847578E-02 * C1),  F(1.45389847E-01 * C1),
+     F(9.75753918E-02 * C2),  F(1.40753505E-01 * C2),
+     F(1.11196689E-01 * C3),  F(1.33264415E-01 * C3),
+     F(1.46955068E-01 * C4), -F(6.79989431E-02 * C4),
+     F(1.45389847E-01 * C5), -F(8.29847578E-02 * C5),
+     F(1.40753505E-01 * C6), -F(9.75753918E-02 * C6),
+     F(1.33264415E-01 * C7), -F(1.11196689E-01 * C7),
+     F(0.00000000E+00 * C0),  F(1.46404076E-02 * C0),
+    -F(5.31873032E-02 * C1),  F(8.85757540E-03 * C1),
+    -F(3.90751381E-02 * C2),  F(2.92408442E-03 * C2),
+    -F(2.61098752E-02 * C3), -F(4.91578024E-03 * C3),
+     F(1.29371806E-02 * C4), -F(5.65949473E-03 * C4),
+     F(1.53184106E-02 * C5), -F(8.02941163E-03 * C5),
+     F(1.62208471E-02 * C6), -F(1.04584443E-02 * C6),
+     F(1.59045603E-02 * C7), -F(1.27472335E-02 * C7),
+     F(0.00000000E+00 * C0), -F(9.02154502E-04 * C0),
+    -F(3.49717454E-03 * C1),  F(2.10371989E-03 * C1),
+    -F(1.64973098E-03 * C2),  F(1.99454554E-03 * C2),
+    -F(1.78805361E-04 * C3),  F(1.61656283E-03 * C3),
+     F(2.01182542E-03 * C4),  F(0.00000000E+00 * C4),
+     F(1.78371725E-03 * C5), -F(1.56575398E-04 * C5),
+     F(1.47640169E-03 * C6), -F(3.43256425E-04 * C6),
+     F(1.13992507E-03 * C7), -F(5.54620202E-04 * C7),
+#undef F
+#define F(x) F_COS(x)
+    -F(1.0000000000 / C0),  F(0.8314696123 / C1),
+    -F(1.0000000000 / C0), -F(0.1950903220 / C1),
+    -F(1.0000000000 / C0), -F(0.9807852804 / C1),
+    -F(1.0000000000 / C0), -F(0.5555702330 / C1),
+    -F(1.0000000000 / C0),  F(0.5555702330 / C1),
+    -F(1.0000000000 / C0),  F(0.9807852804 / C1),
+    -F(1.0000000000 / C0),  F(0.1950903220 / C1),
+    -F(1.0000000000 / C0), -F(0.8314696123 / C1),
+     F(0.9238795325 / C2),  F(0.9807852804 / C3),
+     F(0.3826834324 / C2),  F(0.8314696123 / C3),
+    -F(0.3826834324 / C2),  F(0.5555702330 / C3),
+    -F(0.9238795325 / C2),  F(0.1950903220 / C3),
+    -F(0.9238795325 / C2), -F(0.1950903220 / C3),
+    -F(0.3826834324 / C2), -F(0.5555702330 / C3),
+     F(0.3826834324 / C2), -F(0.8314696123 / C3),
+     F(0.9238795325 / C2), -F(0.9807852804 / C3),
+     F(0.7071067812 / C4),  F(0.5555702330 / C5),
+    -F(0.7071067812 / C4), -F(0.9807852804 / C5),
+    -F(0.7071067812 / C4),  F(0.1950903220 / C5),
+     F(0.7071067812 / C4),  F(0.8314696123 / C5),
+     F(0.7071067812 / C4), -F(0.8314696123 / C5),
+    -F(0.7071067812 / C4), -F(0.1950903220 / C5),
+    -F(0.7071067812 / C4),  F(0.9807852804 / C5),
+     F(0.7071067812 / C4), -F(0.5555702330 / C5),
+     F(0.3826834324 / C6),  F(0.1950903220 / C7),
+    -F(0.9238795325 / C6), -F(0.5555702330 / C7),
+     F(0.9238795325 / C6),  F(0.8314696123 / C7),
+    -F(0.3826834324 / C6), -F(0.9807852804 / C7),
+    -F(0.3826834324 / C6),  F(0.9807852804 / C7),
+     F(0.9238795325 / C6), -F(0.8314696123 / C7),
+    -F(0.9238795325 / C6),  F(0.5555702330 / C7),
+     F(0.3826834324 / C6), -F(0.1950903220 / C7),
+#undef F
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+};
diff --git a/libavcodec/sbcdsp_data.h b/libavcodec/sbcdsp_data.h
new file mode 100644
index 0000000000..10fad5caa5
--- /dev/null
+++ b/libavcodec/sbcdsp_data.h
@@ -0,0 +1,55 @@ 
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * miscellaneous SBC tables
+ */
+
+#ifndef AVCODEC_SBCDSP_DATA_H
+#define AVCODEC_SBCDSP_DATA_H
+
+#include "sbc.h"
+
+#define SBC_PROTO_FIXED_SCALE      16
+#define SBC_COS_TABLE_FIXED_SCALE  15
+
+/*
+ * Constant tables for the use in SIMD optimized analysis filters
+ * Each table consists of two parts:
+ * 1. reordered "proto" table
+ * 2. reordered "cos" table
+ *
+ * Due to non-symmetrical reordering, separate tables for "even"
+ * and "odd" cases are needed
+ */
+
+extern const int16_t ff_sbcdsp_analysis_consts_fixed4_simd_even[];
+extern const int16_t ff_sbcdsp_analysis_consts_fixed4_simd_odd[];
+extern const int16_t ff_sbcdsp_analysis_consts_fixed8_simd_even[];
+extern const int16_t ff_sbcdsp_analysis_consts_fixed8_simd_odd[];
+
+#endif /* AVCODEC_SBCDSP_DATA_H */
diff --git a/libavcodec/sbcenc.c b/libavcodec/sbcenc.c
new file mode 100644
index 0000000000..442e4ed525
--- /dev/null
+++ b/libavcodec/sbcenc.c
@@ -0,0 +1,411 @@ 
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2012-2013  Intel Corporation
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2008  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC encoder implementation
+ */
+
+#include <stdbool.h>
+#include "libavutil/opt.h"
+#include "avcodec.h"
+#include "internal.h"
+#include "put_bits.h"
+#include "sbc.h"
+#include "sbcdsp.h"
+
+typedef struct SBCEncContext {
+    AVClass *class;
+
+    uint8_t frequency;
+    int blocks;
+    int subbands;
+    uint8_t mode;
+    int allocation;
+    int bitpool;
+
+    int joint_stereo;
+    int dual_channel;
+
+    bool init;
+    int msbc;
+    DECLARE_ALIGNED(SBC_ALIGN, struct sbc_frame, frame);
+    DECLARE_ALIGNED(SBC_ALIGN, SBCDSPContext, dsp);
+} SBCEncContext;
+
+static int sbc_analyze_audio(SBCDSPContext *s, struct sbc_frame *frame)
+{
+    int ch, blk;
+    int16_t *x;
+
+    switch (frame->subbands) {
+    case 4:
+        for (ch = 0; ch < frame->channels; ch++) {
+            x = &s->X[ch][s->position - 4 *
+                    s->increment + frame->blocks * 4];
+            for (blk = 0; blk < frame->blocks;
+                        blk += s->increment) {
+                s->sbc_analyze_4s(
+                    s, x,
+                    frame->sb_sample_f[blk][ch],
+                    frame->sb_sample_f[blk + 1][ch] -
+                    frame->sb_sample_f[blk][ch]);
+                x -= 4 * s->increment;
+            }
+        }
+        return frame->blocks * 4;
+
+    case 8:
+        for (ch = 0; ch < frame->channels; ch++) {
+            x = &s->X[ch][s->position - 8 *
+                    s->increment + frame->blocks * 8];
+            for (blk = 0; blk < frame->blocks;
+                        blk += s->increment) {
+                s->sbc_analyze_8s(
+                    s, x,
+                    frame->sb_sample_f[blk][ch],
+                    frame->sb_sample_f[blk + 1][ch] -
+                    frame->sb_sample_f[blk][ch]);
+                x -= 8 * s->increment;
+            }
+        }
+        return frame->blocks * 8;
+
+    default:
+        return AVERROR(EIO);
+    }
+}
+
+/*
+ * Packs the SBC frame from frame into the memory in avpkt.
+ * Returns the length of the packed frame.
+ */
+static size_t sbc_pack_frame(AVPacket *avpkt, struct sbc_frame *frame,
+                             int joint, bool msbc)
+{
+    PutBitContext pb;
+
+    /* Will copy the header parts for CRC-8 calculation here */
+    uint8_t crc_header[11] = { 0 };
+    int crc_pos;
+
+    uint32_t audio_sample;
+
+    int ch, sb, blk;        /* channel, subband, block and bit counters */
+    int bits[2][8];         /* bits distribution */
+    uint32_t levels[2][8];  /* levels are derived from that */
+    uint32_t sb_sample_delta[2][8];
+
+    if (msbc) {
+        avpkt->data[0] = MSBC_SYNCWORD;
+        avpkt->data[1] = 0;
+        avpkt->data[2] = 0;
+    } else {
+        avpkt->data[0] = SBC_SYNCWORD;
+
+        avpkt->data[1]  = (frame->frequency  & 0x03) << 6;
+        avpkt->data[1] |= (frame->block_mode & 0x03) << 4;
+        avpkt->data[1] |= (frame->mode       & 0x03) << 2;
+        avpkt->data[1] |= (frame->allocation & 0x01) << 1;
+        avpkt->data[1] |= (frame->subbands == 8);
+
+        avpkt->data[2] = frame->bitpool;
+
+        if (frame->bitpool > frame->subbands << (4 + (frame->mode == STEREO
+                                                   || frame->mode == JOINT_STEREO)))
+            return -5;
+    }
+
+    /* Can't fill in crc yet */
+    crc_header[0] = avpkt->data[1];
+    crc_header[1] = avpkt->data[2];
+    crc_pos = 16;
+
+    init_put_bits(&pb, avpkt->data + 4, avpkt->size);
+
+    if (frame->mode == JOINT_STEREO) {
+        put_bits(&pb, frame->subbands, joint);
+        crc_header[crc_pos >> 3] = joint;
+        crc_pos += frame->subbands;
+    }
+
+    for (ch = 0; ch < frame->channels; ch++) {
+        for (sb = 0; sb < frame->subbands; sb++) {
+            put_bits(&pb, 4, frame->scale_factor[ch][sb] & 0x0F);
+            crc_header[crc_pos >> 3] <<= 4;
+            crc_header[crc_pos >> 3] |= frame->scale_factor[ch][sb] & 0x0F;
+            crc_pos += 4;
+        }
+    }
+
+    /* align the last crc byte */
+    if (crc_pos % 8)
+        crc_header[crc_pos >> 3] <<= 8 - (crc_pos % 8);
+
+    avpkt->data[3] = sbc_crc8(frame->crc_ctx, crc_header, crc_pos);
+
+    ff_sbc_calculate_bits(frame, bits);
+
+    for (ch = 0; ch < frame->channels; ch++) {
+        for (sb = 0; sb < frame->subbands; sb++) {
+            levels[ch][sb] = ((1 << bits[ch][sb]) - 1) <<
+                (32 - (frame->scale_factor[ch][sb] +
+                    SCALE_OUT_BITS + 2));
+            sb_sample_delta[ch][sb] = (uint32_t) 1 <<
+                (frame->scale_factor[ch][sb] +
+                    SCALE_OUT_BITS + 1);
+        }
+    }
+
+    for (blk = 0; blk < frame->blocks; blk++) {
+        for (ch = 0; ch < frame->channels; ch++) {
+            for (sb = 0; sb < frame->subbands; sb++) {
+
+                if (bits[ch][sb] == 0)
+                    continue;
+
+                audio_sample = ((uint64_t) levels[ch][sb] *
+                    (sb_sample_delta[ch][sb] +
+                    frame->sb_sample_f[blk][ch][sb])) >> 32;
+
+                put_bits(&pb, bits[ch][sb], audio_sample);
+            }
+        }
+    }
+
+    flush_put_bits(&pb);
+
+    return (put_bits_count(&pb) + 7) / 8;
+}
+
+static void sbc_encoder_init(bool msbc, SBCDSPContext *s,
+                             const struct sbc_frame *frame)
+{
+    memset(&s->X, 0, sizeof(s->X));
+    s->position = (SBC_X_BUFFER_SIZE - frame->subbands * 9) & ~7;
+    s->increment = msbc ? 1 : 4;
+
+    ff_sbcdsp_init(s);
+}
+
+static int sbc_encode_init(AVCodecContext *avctx)
+{
+    SBCEncContext *sbc = avctx->priv_data;
+
+    if (sbc->joint_stereo && sbc->dual_channel) {
+        av_log(avctx, AV_LOG_ERROR, "joint_stereo and dual_channel "
+                                    "can't be used at the same time.\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (sbc->msbc) {
+        if (avctx->channels != 1) {
+            av_log(avctx, AV_LOG_ERROR, "mSBC require mono channel.\n");
+            return AVERROR(EINVAL);
+        }
+
+        if (avctx->sample_rate != 16000) {
+            av_log(avctx, AV_LOG_ERROR, "mSBC require 16 kHz samplerate.\n");
+            return AVERROR(EINVAL);
+        }
+
+        sbc->subbands = SBC_SB_8;
+        sbc->blocks = MSBC_BLOCKS;
+        sbc->allocation = SBC_AM_LOUDNESS;
+        sbc->bitpool = 26;
+
+        avctx->frame_size = 8 * MSBC_BLOCKS;
+    } else {
+        sbc->subbands >>= 3;
+        sbc->blocks = (sbc->blocks >> 2) - 1;
+
+        avctx->frame_size = 4*(sbc->subbands + 1) * 4*(sbc->blocks + 1);
+    }
+
+    sbc->mode = SBC_MODE_STEREO;
+    if (sbc->joint_stereo)
+        sbc->mode = SBC_MODE_JOINT_STEREO;
+    else if (sbc->dual_channel)
+        sbc->mode = SBC_MODE_DUAL_CHANNEL;
+    if (avctx->channels == 1)
+        sbc->mode = SBC_MODE_MONO;
+
+    for (int i = 0; avctx->codec->supported_samplerates[i]; i++)
+        if (avctx->sample_rate == avctx->codec->supported_samplerates[i])
+            sbc->frequency = i;
+
+    return 0;
+}
+
+/* Returns the output block size in bytes */
+static size_t sbc_get_frame_length(SBCEncContext *sbc)
+{
+    int ret;
+    uint8_t subbands, channels, blocks, joint, bitpool;
+
+    if (sbc->init && sbc->frame.bitpool == sbc->bitpool)
+        return sbc->frame.length;
+
+    subbands = sbc->subbands ? 8 : 4;
+    if (sbc->msbc)
+        blocks = MSBC_BLOCKS;
+    else
+        blocks = 4 + (sbc->blocks * 4);
+    channels = sbc->mode == SBC_MODE_MONO ? 1 : 2;
+    joint = sbc->mode == SBC_MODE_JOINT_STEREO ? 1 : 0;
+    bitpool = sbc->bitpool;
+
+    ret = 4 + (4 * subbands * channels) / 8;
+    /* This term is not always evenly divide so we round it up */
+    if (channels == 1 || sbc->mode == SBC_MODE_DUAL_CHANNEL)
+        ret += ((blocks * channels * bitpool) + 7) / 8;
+    else
+        ret += (((joint ? subbands : 0) + blocks * bitpool) + 7) / 8;
+
+    return ret;
+}
+
+static int sbc_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
+                            const AVFrame *frame, int *got_packet_ptr)
+{
+    SBCEncContext *sbc = avctx->priv_data;
+    int (*sbc_enc_process_input)(int position,
+            const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE],
+            int nsamples, int nchannels);
+    int ret, j = 0;
+
+    if (!sbc)
+        return AVERROR(EIO);
+
+    if (!sbc->init) {
+        sbc->frame.frequency = sbc->frequency;
+        sbc->frame.mode = sbc->mode;
+        sbc->frame.channels = sbc->mode == SBC_MODE_MONO ? 1 : 2;
+        sbc->frame.allocation = sbc->allocation;
+        sbc->frame.subband_mode = sbc->subbands;
+        sbc->frame.subbands = sbc->subbands ? 8 : 4;
+        sbc->frame.block_mode = sbc->blocks;
+        if (sbc->msbc)
+            sbc->frame.blocks = MSBC_BLOCKS;
+        else
+            sbc->frame.blocks = 4 + (sbc->blocks * 4);
+        sbc->frame.bitpool = sbc->bitpool;
+        sbc->frame.codesize = sbc->frame.subbands * sbc->frame.blocks
+                              * sbc->frame.channels * 2;
+        sbc->frame.length = sbc_get_frame_length(sbc);
+        sbc->frame.crc_ctx = av_crc_get_table(AV_CRC_8_EBU);
+
+        sbc_encoder_init(sbc->msbc, &sbc->dsp, &sbc->frame);
+        sbc->init = true;
+    } else if (sbc->frame.bitpool != sbc->bitpool) {
+        sbc->frame.length = sbc_get_frame_length(sbc);
+        sbc->frame.bitpool = sbc->bitpool;
+    }
+
+    /* input must be large enough to encode a complete frame */
+    if (frame->nb_samples * sbc->frame.channels * 2 < sbc->frame.codesize)
+        return 0;
+
+    if ((ret = ff_alloc_packet2(avctx, avpkt, sbc->frame.length, 0)) < 0)
+        return ret;
+
+    /* Select the needed input data processing function and call it */
+    if (sbc->frame.subbands == 8) {
+        sbc_enc_process_input = sbc->dsp.sbc_enc_process_input_8s;
+    } else {
+        sbc_enc_process_input = sbc->dsp.sbc_enc_process_input_4s;
+    }
+
+    sbc->dsp.position = sbc_enc_process_input(
+        sbc->dsp.position, frame->data[0],
+        sbc->dsp.X, sbc->frame.subbands * sbc->frame.blocks,
+        sbc->frame.channels);
+
+    sbc_analyze_audio(&sbc->dsp, &sbc->frame);
+
+    if (sbc->frame.mode == JOINT_STEREO)
+        j = sbc->dsp.sbc_calc_scalefactors_j(sbc->frame.sb_sample_f,
+                                             sbc->frame.scale_factor,
+                                             sbc->frame.blocks,
+                                             sbc->frame.subbands);
+    else
+        sbc->dsp.sbc_calc_scalefactors(sbc->frame.sb_sample_f,
+                                       sbc->frame.scale_factor,
+                                       sbc->frame.blocks,
+                                       sbc->frame.channels,
+                                       sbc->frame.subbands);
+    emms_c();
+    sbc_pack_frame(avpkt, &sbc->frame, j, sbc->msbc);
+
+    *got_packet_ptr = 1;
+    return 0;
+}
+
+#define OFFSET(x) offsetof(SBCEncContext, x)
+#define AE AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    { "joint_stereo", "use joint stereo",
+      OFFSET(joint_stereo), AV_OPT_TYPE_BOOL, { .i64 =  0 }, 0,   1, AE },
+    { "dual_channel", "use dual channel",
+      OFFSET(dual_channel), AV_OPT_TYPE_BOOL, { .i64 =  0 }, 0,   1, AE },
+    { "subbands",     "number of subbands (4 or 8)",
+      OFFSET(subbands),     AV_OPT_TYPE_INT,  { .i64 =  8 }, 4,   8, AE },
+    { "bitpool",      "bitpool value",
+      OFFSET(bitpool),      AV_OPT_TYPE_INT,  { .i64 = 32 }, 0, 255, AE },
+    { "blocks",       "number of blocks (4, 8, 12 or 16)",
+      OFFSET(blocks),       AV_OPT_TYPE_INT,  { .i64 = 16 }, 4,  16, AE },
+    { "snr",          "use SNR mode (instead of loudness)",
+      OFFSET(allocation),   AV_OPT_TYPE_BOOL, { .i64 =  0 }, 0,   1, AE },
+    { "msbc",         "use mSBC mode (wideband speech mono SBC)",
+      OFFSET(msbc),         AV_OPT_TYPE_BOOL, { .i64 =  0 }, 0,   1, AE },
+    { NULL },
+};
+
+static const AVClass sbc_class = {
+    .class_name = "sbc encoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_sbc_encoder = {
+    .name                  = "sbc",
+    .long_name             = NULL_IF_CONFIG_SMALL("SBC (low-complexity subband codec)"),
+    .type                  = AVMEDIA_TYPE_AUDIO,
+    .id                    = AV_CODEC_ID_SBC,
+    .priv_data_size        = sizeof(SBCEncContext),
+    .init                  = sbc_encode_init,
+    .encode2               = sbc_encode_frame,
+    .capabilities          = AV_CODEC_CAP_SMALL_LAST_FRAME,
+    .caps_internal         = FF_CODEC_CAP_INIT_THREADSAFE,
+    .channel_layouts       = (const uint64_t[]) { AV_CH_LAYOUT_MONO,
+                                                  AV_CH_LAYOUT_STEREO, 0},
+    .sample_fmts           = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16,
+                                                             AV_SAMPLE_FMT_NONE },
+    .supported_samplerates = (const int[]) { 16000, 32000, 44100, 48000, 0 },
+    .priv_class            = &sbc_class,
+};