diff mbox

[FFmpeg-devel,1/3] sbc: implement SBC codec (low-complexity subband codec)

Message ID 20171105233520.15856-2-aurel@gnuage.org
State New
Headers show

Commit Message

Aurelien Jacobs Nov. 5, 2017, 11:35 p.m. UTC
This was originally based on libsbc, and was fully integrated into ffmpeg.
---
 doc/general.texi                 |   2 +
 libavcodec/Makefile              |   4 +
 libavcodec/allcodecs.c           |   2 +
 libavcodec/arm/Makefile          |   3 +
 libavcodec/arm/sbcdsp_armv6.S    | 245 ++++++++++++++
 libavcodec/arm/sbcdsp_init_arm.c | 105 ++++++
 libavcodec/arm/sbcdsp_neon.S     | 714 +++++++++++++++++++++++++++++++++++++++
 libavcodec/avcodec.h             |   2 +
 libavcodec/codec_desc.c          |  12 +
 libavcodec/sbc.c                 | 316 +++++++++++++++++
 libavcodec/sbc.h                 | 121 +++++++
 libavcodec/sbcdec.c              | 469 +++++++++++++++++++++++++
 libavcodec/sbcdec_data.c         | 127 +++++++
 libavcodec/sbcdec_data.h         |  44 +++
 libavcodec/sbcdsp.c              | 569 +++++++++++++++++++++++++++++++
 libavcodec/sbcdsp.h              |  86 +++++
 libavcodec/sbcdsp_data.c         | 335 ++++++++++++++++++
 libavcodec/sbcdsp_data.h         |  57 ++++
 libavcodec/sbcenc.c              | 461 +++++++++++++++++++++++++
 libavcodec/x86/Makefile          |   2 +
 libavcodec/x86/sbcdsp.asm        | 290 ++++++++++++++++
 libavcodec/x86/sbcdsp_init.c     |  51 +++
 22 files changed, 4017 insertions(+)
 create mode 100644 libavcodec/arm/sbcdsp_armv6.S
 create mode 100644 libavcodec/arm/sbcdsp_init_arm.c
 create mode 100644 libavcodec/arm/sbcdsp_neon.S
 create mode 100644 libavcodec/sbc.c
 create mode 100644 libavcodec/sbc.h
 create mode 100644 libavcodec/sbcdec.c
 create mode 100644 libavcodec/sbcdec_data.c
 create mode 100644 libavcodec/sbcdec_data.h
 create mode 100644 libavcodec/sbcdsp.c
 create mode 100644 libavcodec/sbcdsp.h
 create mode 100644 libavcodec/sbcdsp_data.c
 create mode 100644 libavcodec/sbcdsp_data.h
 create mode 100644 libavcodec/sbcenc.c
 create mode 100644 libavcodec/x86/sbcdsp.asm
 create mode 100644 libavcodec/x86/sbcdsp_init.c

Comments

Michael Niedermayer Nov. 6, 2017, 3:22 a.m. UTC | #1
Hi 

On Mon, Nov 06, 2017 at 12:35:18AM +0100, Aurelien Jacobs wrote:
> This was originally based on libsbc, and was fully integrated into ffmpeg.
> ---
>  doc/general.texi                 |   2 +
>  libavcodec/Makefile              |   4 +
>  libavcodec/allcodecs.c           |   2 +
>  libavcodec/arm/Makefile          |   3 +
>  libavcodec/arm/sbcdsp_armv6.S    | 245 ++++++++++++++
>  libavcodec/arm/sbcdsp_init_arm.c | 105 ++++++
>  libavcodec/arm/sbcdsp_neon.S     | 714 +++++++++++++++++++++++++++++++++++++++
>  libavcodec/avcodec.h             |   2 +
>  libavcodec/codec_desc.c          |  12 +
>  libavcodec/sbc.c                 | 316 +++++++++++++++++
>  libavcodec/sbc.h                 | 121 +++++++
>  libavcodec/sbcdec.c              | 469 +++++++++++++++++++++++++
>  libavcodec/sbcdec_data.c         | 127 +++++++
>  libavcodec/sbcdec_data.h         |  44 +++
>  libavcodec/sbcdsp.c              | 569 +++++++++++++++++++++++++++++++
>  libavcodec/sbcdsp.h              |  86 +++++
>  libavcodec/sbcdsp_data.c         | 335 ++++++++++++++++++
>  libavcodec/sbcdsp_data.h         |  57 ++++
>  libavcodec/sbcenc.c              | 461 +++++++++++++++++++++++++
>  libavcodec/x86/Makefile          |   2 +
>  libavcodec/x86/sbcdsp.asm        | 290 ++++++++++++++++
>  libavcodec/x86/sbcdsp_init.c     |  51 +++
>  22 files changed, 4017 insertions(+)
>  create mode 100644 libavcodec/arm/sbcdsp_armv6.S
>  create mode 100644 libavcodec/arm/sbcdsp_init_arm.c
>  create mode 100644 libavcodec/arm/sbcdsp_neon.S
>  create mode 100644 libavcodec/sbc.c
>  create mode 100644 libavcodec/sbc.h
>  create mode 100644 libavcodec/sbcdec.c
>  create mode 100644 libavcodec/sbcdec_data.c
>  create mode 100644 libavcodec/sbcdec_data.h
>  create mode 100644 libavcodec/sbcdsp.c
>  create mode 100644 libavcodec/sbcdsp.h
>  create mode 100644 libavcodec/sbcdsp_data.c
>  create mode 100644 libavcodec/sbcdsp_data.h
>  create mode 100644 libavcodec/sbcenc.c
>  create mode 100644 libavcodec/x86/sbcdsp.asm
>  create mode 100644 libavcodec/x86/sbcdsp_init.c

this seems to fail to build on x86-32

        libavcodec/x86/sbcdsp_init.o
src/libavcodec/x86/sbcdsp.asm:251: error: invalid operands in non-64-bit mode
src/libavcodec/x86/sbcdsp.asm:264: error: invalid operands in non-64-bit mode
src/libavcodec/x86/sbcdsp.asm:267: error: invalid operands in non-64-bit mode
src/libavcodec/x86/sbcdsp.asm:269: error: invalid operands in non-64-bit mode
src/libavcodec/x86/sbcdsp.asm:270: error: invalid operands in non-64-bit mode
src/libavcodec/x86/sbcdsp.asm:271: error: invalid operands in non-64-bit mode
src/libavcodec/x86/sbcdsp.asm:273: error: invalid operands in non-64-bit mode
src/libavcodec/x86/sbcdsp.asm:274: error: invalid operands in non-64-bit mode
src/libavcodec/x86/sbcdsp.asm:275: error: invalid operands in non-64-bit mode
src/libavcodec/x86/sbcdsp.asm:276: error: invalid operands in non-64-bit mode
STRIP   libavcodec/x86/opus_pvq_search.o


[...]
Rostislav Pehlivanov Nov. 6, 2017, 4:40 a.m. UTC | #2
On 5 November 2017 at 23:35, Aurelien Jacobs <aurel@gnuage.org> wrote:

> This was originally based on libsbc, and was fully integrated into ffmpeg.
> ---
>  doc/general.texi                 |   2 +
>  libavcodec/Makefile              |   4 +
>  libavcodec/allcodecs.c           |   2 +
>  libavcodec/arm/Makefile          |   3 +
>  libavcodec/arm/sbcdsp_armv6.S    | 245 ++++++++++++++
>  libavcodec/arm/sbcdsp_init_arm.c | 105 ++++++
>  libavcodec/arm/sbcdsp_neon.S     | 714 ++++++++++++++++++++++++++++++
> +++++++++
>  libavcodec/avcodec.h             |   2 +
>  libavcodec/codec_desc.c          |  12 +
>  libavcodec/sbc.c                 | 316 +++++++++++++++++
>  libavcodec/sbc.h                 | 121 +++++++
>  libavcodec/sbcdec.c              | 469 +++++++++++++++++++++++++
>  libavcodec/sbcdec_data.c         | 127 +++++++
>  libavcodec/sbcdec_data.h         |  44 +++
>  libavcodec/sbcdsp.c              | 569 +++++++++++++++++++++++++++++++
>  libavcodec/sbcdsp.h              |  86 +++++
>  libavcodec/sbcdsp_data.c         | 335 ++++++++++++++++++
>  libavcodec/sbcdsp_data.h         |  57 ++++
>  libavcodec/sbcenc.c              | 461 +++++++++++++++++++++++++
>  libavcodec/x86/Makefile          |   2 +
>  libavcodec/x86/sbcdsp.asm        | 290 ++++++++++++++++
>  libavcodec/x86/sbcdsp_init.c     |  51 +++
>  22 files changed, 4017 insertions(+)
>  create mode 100644 libavcodec/arm/sbcdsp_armv6.S
>  create mode 100644 libavcodec/arm/sbcdsp_init_arm.c
>  create mode 100644 libavcodec/arm/sbcdsp_neon.S
>  create mode 100644 libavcodec/sbc.c
>  create mode 100644 libavcodec/sbc.h
>  create mode 100644 libavcodec/sbcdec.c
>  create mode 100644 libavcodec/sbcdec_data.c
>  create mode 100644 libavcodec/sbcdec_data.h
>  create mode 100644 libavcodec/sbcdsp.c
>  create mode 100644 libavcodec/sbcdsp.h
>  create mode 100644 libavcodec/sbcdsp_data.c
>  create mode 100644 libavcodec/sbcdsp_data.h
>  create mode 100644 libavcodec/sbcenc.c
>  create mode 100644 libavcodec/x86/sbcdsp.asm
>  create mode 100644 libavcodec/x86/sbcdsp_init.c
>
> diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
> index c4134424f0..2d541bf64a 100644
> --- a/libavcodec/avcodec.h
> +++ b/libavcodec/avcodec.h
> @@ -632,6 +632,8 @@ enum AVCodecID {
>      AV_CODEC_ID_ATRAC3AL,
>      AV_CODEC_ID_ATRAC3PAL,
>      AV_CODEC_ID_DOLBY_E,
> +    AV_CODEC_ID_SBC,
> +    AV_CODEC_ID_MSBC,
>
>
See below.


>      /* subtitle codecs */
>      AV_CODEC_ID_FIRST_SUBTITLE = 0x17000,          ///< A dummy ID
> pointing at the start of subtitle codecs.
> diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
> index 92bf1d2681..8d613507e0 100644
> --- a/libavcodec/codec_desc.c
> +++ b/libavcodec/codec_desc.c
> @@ -2859,6 +2859,18 @@ static const AVCodecDescriptor codec_descriptors[]
> = {
>          .long_name = NULL_IF_CONFIG_SMALL("ADPCM MTAF"),
>          .props     = AV_CODEC_PROP_LOSSY,
>      },
> +    {
> +        .id        = AV_CODEC_ID_SBC,
> +        .type      = AVMEDIA_TYPE_AUDIO,
> +        .name      = "sbc",
> +        .long_name = NULL_IF_CONFIG_SMALL("SBC (low-complexity subband
> codec)"),
> +    },
> +    {
> +        .id        = AV_CODEC_ID_MSBC,
> +        .type      = AVMEDIA_TYPE_AUDIO,
> +        .name      = "msbc",
> +        .long_name = NULL_IF_CONFIG_SMALL("mSBC (wideband speech mono
> SBC)"),
> +    },
>

Is there a bitstream difference between the two? I don't think so, so you
should instead define FF_PROFILE_SBC_WB and use a single codec ID.


>
>      /* subtitle codecs */
>      {
> diff --git a/libavcodec/sbc.c b/libavcodec/sbc.c
> new file mode 100644
> index 0000000000..99d02cc56a
> --- /dev/null
> +++ b/libavcodec/sbc.c
> @@ -0,0 +1,316 @@
> +/*
> + * Bluetooth low-complexity, subband codec (SBC)
> + *
> + * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
> + * Copyright (C) 2012-2013  Intel Corporation
> + * Copyright (C) 2008-2010  Nokia Corporation
> + * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
> + * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
> + * Copyright (C) 2005-2008  Brad Midgley <bmidgley@xmission.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * SBC common functions for the encoder and decoder
> + */
> +
> +#include "avcodec.h"
> +#include "sbc.h"
> +
> +/*
> + * Calculates the CRC-8 of the first len bits in data
> + */
> +static const uint8_t crc_table[256] = {
> +    0x00, 0x1D, 0x3A, 0x27, 0x74, 0x69, 0x4E, 0x53,
> +    0xE8, 0xF5, 0xD2, 0xCF, 0x9C, 0x81, 0xA6, 0xBB,
> +    0xCD, 0xD0, 0xF7, 0xEA, 0xB9, 0xA4, 0x83, 0x9E,
> +    0x25, 0x38, 0x1F, 0x02, 0x51, 0x4C, 0x6B, 0x76,
> +    0x87, 0x9A, 0xBD, 0xA0, 0xF3, 0xEE, 0xC9, 0xD4,
> +    0x6F, 0x72, 0x55, 0x48, 0x1B, 0x06, 0x21, 0x3C,
> +    0x4A, 0x57, 0x70, 0x6D, 0x3E, 0x23, 0x04, 0x19,
> +    0xA2, 0xBF, 0x98, 0x85, 0xD6, 0xCB, 0xEC, 0xF1,
> +    0x13, 0x0E, 0x29, 0x34, 0x67, 0x7A, 0x5D, 0x40,
> +    0xFB, 0xE6, 0xC1, 0xDC, 0x8F, 0x92, 0xB5, 0xA8,
> +    0xDE, 0xC3, 0xE4, 0xF9, 0xAA, 0xB7, 0x90, 0x8D,
> +    0x36, 0x2B, 0x0C, 0x11, 0x42, 0x5F, 0x78, 0x65,
> +    0x94, 0x89, 0xAE, 0xB3, 0xE0, 0xFD, 0xDA, 0xC7,
> +    0x7C, 0x61, 0x46, 0x5B, 0x08, 0x15, 0x32, 0x2F,
> +    0x59, 0x44, 0x63, 0x7E, 0x2D, 0x30, 0x17, 0x0A,
> +    0xB1, 0xAC, 0x8B, 0x96, 0xC5, 0xD8, 0xFF, 0xE2,
> +    0x26, 0x3B, 0x1C, 0x01, 0x52, 0x4F, 0x68, 0x75,
> +    0xCE, 0xD3, 0xF4, 0xE9, 0xBA, 0xA7, 0x80, 0x9D,
> +    0xEB, 0xF6, 0xD1, 0xCC, 0x9F, 0x82, 0xA5, 0xB8,
> +    0x03, 0x1E, 0x39, 0x24, 0x77, 0x6A, 0x4D, 0x50,
> +    0xA1, 0xBC, 0x9B, 0x86, 0xD5, 0xC8, 0xEF, 0xF2,
> +    0x49, 0x54, 0x73, 0x6E, 0x3D, 0x20, 0x07, 0x1A,
> +    0x6C, 0x71, 0x56, 0x4B, 0x18, 0x05, 0x22, 0x3F,
> +    0x84, 0x99, 0xBE, 0xA3, 0xF0, 0xED, 0xCA, 0xD7,
> +    0x35, 0x28, 0x0F, 0x12, 0x41, 0x5C, 0x7B, 0x66,
> +    0xDD, 0xC0, 0xE7, 0xFA, 0xA9, 0xB4, 0x93, 0x8E,
> +    0xF8, 0xE5, 0xC2, 0xDF, 0x8C, 0x91, 0xB6, 0xAB,
> +    0x10, 0x0D, 0x2A, 0x37, 0x64, 0x79, 0x5E, 0x43,
> +    0xB2, 0xAF, 0x88, 0x95, 0xC6, 0xDB, 0xFC, 0xE1,
> +    0x5A, 0x47, 0x60, 0x7D, 0x2E, 0x33, 0x14, 0x09,
> +    0x7F, 0x62, 0x45, 0x58, 0x0B, 0x16, 0x31, 0x2C,
> +    0x97, 0x8A, 0xAD, 0xB0, 0xE3, 0xFE, 0xD9, 0xC4
> +};
> +
> +uint8_t ff_sbc_crc8(const uint8_t *data, size_t len)
> +{
> +    uint8_t crc = 0x0f;
> +    size_t i;
> +    uint8_t octet;
> +
> +    for (i = 0; i < len / 8; i++)
> +        crc = crc_table[crc ^ data[i]];
> +
> +    octet = data[i];
> +    for (i = 0; i < len % 8; i++) {
> +        char bit = ((octet ^ crc) & 0x80) >> 7;
> +
> +        crc = ((crc & 0x7f) << 1) ^ (bit ? 0x1d : 0);
> +
> +        octet = octet << 1;
> +    }
> +
> +    return crc;
> +}
>


We have CRC functions already, look in libavutil/crc.h



> +                        if (subbands == 4)
> +                            loudness = frame->scale_factor[ch][sb] -
> sbc_offset4[sf][sb];
> +                        else
> +                            loudness = frame->scale_factor[ch][sb] -
> sbc_offset8[sf][sb];
> +                        if (loudness > 0)
> +                            bitneed[ch][sb] = loudness / 2;
> +                        else
> +                            bitneed[ch][sb] = loudness;
>


bitneed[ch][sb] = loudness >> (loudness > 0);



>
> +
> +static int sbc_decode_frame(AVCodecContext *avctx,
> +                            void *data, int *got_frame_ptr,
> +                            AVPacket *avpkt)
> +{
> +    SBCDecContext *sbc = avctx->priv_data;
> +    int i, ch, samples, ret;
> +    AVFrame *frame = data;
> +    int16_t *ptr;
> +
> +    if (!sbc)
> +        return AVERROR(EIO);
> +
> +    sbc->frame.length = sbc->unpack_frame(avpkt->data, &sbc->frame,
> avpkt->size);
> +    if (sbc->frame.length <= 0)
> +        return sbc->frame.length;
> +
> +    samples = sbc_synthesize_audio(&sbc->dsp, &sbc->frame);
> +
> +    frame->nb_samples = samples;
> +    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
> +        return ret;
> +    ptr = (int16_t *)frame->data[0];
> +
> +    for (i = 0; i < samples; i++)
> +        for (ch = 0; ch < sbc->frame.channels; ch++)
> +            *ptr++ = sbc->frame.pcm_sample[ch][i];
> +
>

Once again, use planar sample formats



> +    *got_frame_ptr = 1;
> +
> +    return sbc->frame.length;
> +}
> +
> +#if CONFIG_SBC_DECODER
> +AVCodec ff_sbc_decoder = {
> +    .name                  = "sbc",
> +    .long_name             = NULL_IF_CONFIG_SMALL("SBC (low-complexity
> subband codec)"),
> +    .type                  = AVMEDIA_TYPE_AUDIO,
> +    .id                    = AV_CODEC_ID_SBC,
> +    .priv_data_size        = sizeof(SBCDecContext),
> +    .init                  = sbc_decode_init,
> +    .decode                = sbc_decode_frame,
> +    .capabilities          = AV_CODEC_CAP_DR1,
> +    .channel_layouts       = (const uint64_t[]) { AV_CH_LAYOUT_MONO,
> +                                                  AV_CH_LAYOUT_STEREO, 0},
> +    .sample_fmts           = (const enum AVSampleFormat[]) {
> AV_SAMPLE_FMT_S16,
>


AV_SAMPLE_FMT_S16P



> +
>  AV_SAMPLE_FMT_NONE },
> +    .supported_samplerates = (const int[]) { 16000, 32000, 44100, 48000,
> 0 },
> +};
> +#endif
> +
> +#if CONFIG_MSBC_DECODER
> +AVCodec ff_msbc_decoder = {
> +    .name                  = "msbc",
> +    .long_name             = NULL_IF_CONFIG_SMALL("mSBC (wideband speech
> mono SBC)"),
> +    .type                  = AVMEDIA_TYPE_AUDIO,
> +    .id                    = AV_CODEC_ID_MSBC,
> +    .priv_data_size        = sizeof(SBCDecContext),
> +    .init                  = msbc_decode_init,
> +    .decode                = sbc_decode_frame,
> +    .capabilities          = AV_CODEC_CAP_DR1,
> +    .channel_layouts       = (const uint64_t[]) { AV_CH_LAYOUT_MONO, 0},
> +    .sample_fmts           = (const enum AVSampleFormat[]) {
> AV_SAMPLE_FMT_S16,
>


AV_SAMPLE_FMT_S16P



> +
> +/*
> + * A reference C code of analysis filter with SIMD-friendly tables
> + * reordering and code layout. This code can be used to develop platform
> + * specific SIMD optimizations. Also it may be used as some kind of test
> + * for compiler autovectorization capabilities (who knows, if the compiler
> + * is very good at this stuff, hand optimized assembly may be not strictly
> + * needed for some platform).
> + *
> + * Note: It is also possible to make a simple variant of analysis filter,
> + * which needs only a single constants table without taking care about
> + * even/odd cases. This simple variant of filter can be implemented
> without
> + * input data permutation. The only thing that would be lost is the
> + * possibility to use pairwise SIMD multiplications. But for some simple
> + * CPU cores without SIMD extensions it can be useful. If anybody is
> + * interested in implementing such variant of a filter, sourcecode from
> + * bluez versions 4.26/4.27 can be used as a reference and the history of
> + * the changes in git repository done around that time may be worth
> checking.
> + */
> +
> +static void sbc_analyze_4_simd(const int16_t *in, int32_t *out,
> +                               const int16_t *consts)
> +{
> +    int32_t t1[4];
> +    int16_t t2[4];
> +    int hop = 0;
> +
> +    /* rounding coefficient */
> +    t1[0] = t1[1] = t1[2] = t1[3] =
> +        (int32_t) 1 << (SBC_PROTO_FIXED4_SCALE - 1);
> +
> +    /* low pass polyphase filter */
> +    for (hop = 0; hop < 40; hop += 8) {
> +        t1[0] += (int32_t) in[hop] * consts[hop];
> +        t1[0] += (int32_t) in[hop + 1] * consts[hop + 1];
> +        t1[1] += (int32_t) in[hop + 2] * consts[hop + 2];
> +        t1[1] += (int32_t) in[hop + 3] * consts[hop + 3];
> +        t1[2] += (int32_t) in[hop + 4] * consts[hop + 4];
> +        t1[2] += (int32_t) in[hop + 5] * consts[hop + 5];
> +        t1[3] += (int32_t) in[hop + 6] * consts[hop + 6];
> +        t1[3] += (int32_t) in[hop + 7] * consts[hop + 7];
> +    }
> +
> +    /* scaling */
> +    t2[0] = t1[0] >> SBC_PROTO_FIXED4_SCALE;
> +    t2[1] = t1[1] >> SBC_PROTO_FIXED4_SCALE;
> +    t2[2] = t1[2] >> SBC_PROTO_FIXED4_SCALE;
> +    t2[3] = t1[3] >> SBC_PROTO_FIXED4_SCALE;
> +
> +    /* do the cos transform */
> +    t1[0]  = (int32_t) t2[0] * consts[40 + 0];
> +    t1[0] += (int32_t) t2[1] * consts[40 + 1];
> +    t1[1]  = (int32_t) t2[0] * consts[40 + 2];
> +    t1[1] += (int32_t) t2[1] * consts[40 + 3];
> +    t1[2]  = (int32_t) t2[0] * consts[40 + 4];
> +    t1[2] += (int32_t) t2[1] * consts[40 + 5];
> +    t1[3]  = (int32_t) t2[0] * consts[40 + 6];
> +    t1[3] += (int32_t) t2[1] * consts[40 + 7];
> +
> +    t1[0] += (int32_t) t2[2] * consts[40 + 8];
> +    t1[0] += (int32_t) t2[3] * consts[40 + 9];
> +    t1[1] += (int32_t) t2[2] * consts[40 + 10];
> +    t1[1] += (int32_t) t2[3] * consts[40 + 11];
> +    t1[2] += (int32_t) t2[2] * consts[40 + 12];
> +    t1[2] += (int32_t) t2[3] * consts[40 + 13];
> +    t1[3] += (int32_t) t2[2] * consts[40 + 14];
> +    t1[3] += (int32_t) t2[3] * consts[40 + 15];
> +
> +    out[0] = t1[0] >>
> +        (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS);
> +    out[1] = t1[1] >>
> +        (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS);
> +    out[2] = t1[2] >>
> +        (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS);
> +    out[3] = t1[3] >>
> +        (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS);
> +}
> +
> +static void sbc_analyze_8_simd(const int16_t *in, int32_t *out,
> +                               const int16_t *consts)
> +{
> +    int32_t t1[8];
> +    int16_t t2[8];
> +    int i, hop;
> +
> +    /* rounding coefficient */
> +    t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] =
> +        (int32_t) 1 << (SBC_PROTO_FIXED8_SCALE-1);
> +
> +    /* low pass polyphase filter */
> +    for (hop = 0; hop < 80; hop += 16) {
> +        t1[0] += (int32_t) in[hop] * consts[hop];
> +        t1[0] += (int32_t) in[hop + 1] * consts[hop + 1];
> +        t1[1] += (int32_t) in[hop + 2] * consts[hop + 2];
> +        t1[1] += (int32_t) in[hop + 3] * consts[hop + 3];
> +        t1[2] += (int32_t) in[hop + 4] * consts[hop + 4];
> +        t1[2] += (int32_t) in[hop + 5] * consts[hop + 5];
> +        t1[3] += (int32_t) in[hop + 6] * consts[hop + 6];
> +        t1[3] += (int32_t) in[hop + 7] * consts[hop + 7];
> +        t1[4] += (int32_t) in[hop + 8] * consts[hop + 8];
> +        t1[4] += (int32_t) in[hop + 9] * consts[hop + 9];
> +        t1[5] += (int32_t) in[hop + 10] * consts[hop + 10];
> +        t1[5] += (int32_t) in[hop + 11] * consts[hop + 11];
> +        t1[6] += (int32_t) in[hop + 12] * consts[hop + 12];
> +        t1[6] += (int32_t) in[hop + 13] * consts[hop + 13];
> +        t1[7] += (int32_t) in[hop + 14] * consts[hop + 14];
> +        t1[7] += (int32_t) in[hop + 15] * consts[hop + 15];
> +    }
> +
> +    /* scaling */
> +    t2[0] = t1[0] >> SBC_PROTO_FIXED8_SCALE;
> +    t2[1] = t1[1] >> SBC_PROTO_FIXED8_SCALE;
> +    t2[2] = t1[2] >> SBC_PROTO_FIXED8_SCALE;
> +    t2[3] = t1[3] >> SBC_PROTO_FIXED8_SCALE;
> +    t2[4] = t1[4] >> SBC_PROTO_FIXED8_SCALE;
> +    t2[5] = t1[5] >> SBC_PROTO_FIXED8_SCALE;
> +    t2[6] = t1[6] >> SBC_PROTO_FIXED8_SCALE;
> +    t2[7] = t1[7] >> SBC_PROTO_FIXED8_SCALE;
> +
> +
> +    /* do the cos transform */
> +    t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] = 0;
> +
> +    for (i = 0; i < 4; i++) {
> +        t1[0] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 0];
> +        t1[0] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 1];
> +        t1[1] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 2];
> +        t1[1] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 3];
> +        t1[2] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 4];
> +        t1[2] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 5];
> +        t1[3] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 6];
> +        t1[3] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 7];
> +        t1[4] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 8];
> +        t1[4] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 9];
> +        t1[5] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 10];
> +        t1[5] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 11];
> +        t1[6] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 12];
> +        t1[6] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 13];
> +        t1[7] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 14];
> +        t1[7] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 15];
> +    }
> +
> +    for (i = 0; i < 8; i++)
> +        out[i] = t1[i] >>
> +            (SBC_COS_TABLE_FIXED8_SCALE - SCALE_OUT_BITS);
> +}
> +
>


What does it do here? A PQF into an FFT?
I might investigate using lavc's fixed point mdct for this maybe, I hate
custom fixed-point analysis transforms.



> +static inline void sbc_analyze_4b_4s_simd(SBCDSPContext *s,
> +                                          int16_t *x, int32_t *out, int
> out_stride)
> +{
> +    /* Analyze blocks */
> +    s->sbc_analyze_4(x + 12, out, ff_sbcdsp_analysis_consts_
> fixed4_simd_odd);
> +    out += out_stride;
> +    s->sbc_analyze_4(x + 8, out, ff_sbcdsp_analysis_consts_
> fixed4_simd_even);
> +    out += out_stride;
> +    s->sbc_analyze_4(x + 4, out, ff_sbcdsp_analysis_consts_
> fixed4_simd_odd);
> +    out += out_stride;
> +    s->sbc_analyze_4(x + 0, out, ff_sbcdsp_analysis_consts_
> fixed4_simd_even);
> +
> +    emms_c();
> +}
> +
> +static inline void sbc_analyze_4b_8s_simd(SBCDSPContext *s,
> +                                          int16_t *x, int32_t *out, int
> out_stride)
> +{
> +    /* Analyze blocks */
> +    s->sbc_analyze_8(x + 24, out, ff_sbcdsp_analysis_consts_
> fixed8_simd_odd);
> +    out += out_stride;
> +    s->sbc_analyze_8(x + 16, out, ff_sbcdsp_analysis_consts_
> fixed8_simd_even);
> +    out += out_stride;
> +    s->sbc_analyze_8(x + 8, out, ff_sbcdsp_analysis_consts_
> fixed8_simd_odd);
> +    out += out_stride;
> +    s->sbc_analyze_8(x + 0, out, ff_sbcdsp_analysis_consts_
> fixed8_simd_even);
> +
> +    emms_c();
> +}
> +
> +static inline void sbc_analyze_1b_8s_simd_even(SBCDSPContext *s,
> +                                               int16_t *x, int32_t *out,
> +                                               int out_stride);
> +
> +static inline void sbc_analyze_1b_8s_simd_odd(SBCDSPContext *s,
> +                                              int16_t *x, int32_t *out,
> +                                              int out_stride)
> +{
> +    s->sbc_analyze_8(x, out, ff_sbcdsp_analysis_consts_fixed8_simd_odd);
> +    s->sbc_analyze_8s = sbc_analyze_1b_8s_simd_even;
> +
> +    emms_c();
> +}
> +
> +static inline void sbc_analyze_1b_8s_simd_even(SBCDSPContext *s,
> +                                               int16_t *x, int32_t *out,
> +                                               int out_stride)
> +{
> +    s->sbc_analyze_8(x, out, ff_sbcdsp_analysis_consts_fixed8_simd_even);
> +    s->sbc_analyze_8s = sbc_analyze_1b_8s_simd_odd;
> +
> +    emms_c();
> +}
> +
> +#define PCM(i)  AV_RN16(pcm + 2*(i))
>

Don't use a define, just substitute it directly.


> +
> +/*
> + * Internal helper functions for input data processing. In order to get
> + * optimal performance, it is important to have "nsamples" and "nchannels"
> + * arguments used with this inline function as compile time constants.
> + */
> +
> +static av_always_inline int sbc_encoder_process_input_s4_internal(
> +    int position, const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE],
> +    int nsamples, int nchannels)
> +{
> +    /* handle X buffer wraparound */
> +    if (position < nsamples) {
> +        if (nchannels > 0)
> +            memcpy(&X[0][SBC_X_BUFFER_SIZE - 40], &X[0][position],
> +                            36 * sizeof(int16_t));
> +        if (nchannels > 1)
> +            memcpy(&X[1][SBC_X_BUFFER_SIZE - 40], &X[1][position],
> +                            36 * sizeof(int16_t));
> +        position = SBC_X_BUFFER_SIZE - 40;
> +    }
> +
> +    /* copy/permutate audio samples */
> +    while ((nsamples -= 8) >= 0) {
> +        position -= 8;
> +        if (nchannels > 0) {
> +            int16_t *x = &X[0][position];
> +            x[0]  = PCM(0 + 7 * nchannels);
> +            x[1]  = PCM(0 + 3 * nchannels);
> +            x[2]  = PCM(0 + 6 * nchannels);
> +            x[3]  = PCM(0 + 4 * nchannels);
> +            x[4]  = PCM(0 + 0 * nchannels);
> +            x[5]  = PCM(0 + 2 * nchannels);
> +            x[6]  = PCM(0 + 1 * nchannels);
> +            x[7]  = PCM(0 + 5 * nchannels);
> +        }
> +        if (nchannels > 1) {
> +            int16_t *x = &X[1][position];
> +            x[0]  = PCM(1 + 7 * nchannels);
> +            x[1]  = PCM(1 + 3 * nchannels);
> +            x[2]  = PCM(1 + 6 * nchannels);
> +            x[3]  = PCM(1 + 4 * nchannels);
> +            x[4]  = PCM(1 + 0 * nchannels);
> +            x[5]  = PCM(1 + 2 * nchannels);
> +            x[6]  = PCM(1 + 1 * nchannels);
> +            x[7]  = PCM(1 + 5 * nchannels);
> +        }
> +        pcm += 16 * nchannels;
> +    }
> +
> +    return position;
> +}
> +
> +static av_always_inline int sbc_encoder_process_input_s8_internal(
> +    int position, const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE],
> +    int nsamples, int nchannels)
> +{
> +    /* handle X buffer wraparound */
> +    if (position < nsamples) {
> +        if (nchannels > 0)
> +            memcpy(&X[0][SBC_X_BUFFER_SIZE - 72], &X[0][position],
> +                            72 * sizeof(int16_t));
> +        if (nchannels > 1)
> +            memcpy(&X[1][SBC_X_BUFFER_SIZE - 72], &X[1][position],
> +                            72 * sizeof(int16_t));
> +        position = SBC_X_BUFFER_SIZE - 72;
> +    }
> +
> +    if (position % 16 == 8) {
> +        position -= 8;
> +        nsamples -= 8;
> +        if (nchannels > 0) {
> +            int16_t *x = &X[0][position];
> +            x[0]  = PCM(0 + (15-8) * nchannels);
> +            x[2]  = PCM(0 + (14-8) * nchannels);
> +            x[3]  = PCM(0 + (8-8) * nchannels);
> +            x[4]  = PCM(0 + (13-8) * nchannels);
> +            x[5]  = PCM(0 + (9-8) * nchannels);
> +            x[6]  = PCM(0 + (12-8) * nchannels);
> +            x[7]  = PCM(0 + (10-8) * nchannels);
> +            x[8]  = PCM(0 + (11-8) * nchannels);
> +        }
> +        if (nchannels > 1) {
> +            int16_t *x = &X[1][position];
> +            x[0]  = PCM(1 + (15-8) * nchannels);
> +            x[2]  = PCM(1 + (14-8) * nchannels);
> +            x[3]  = PCM(1 + (8-8) * nchannels);
> +            x[4]  = PCM(1 + (13-8) * nchannels);
> +            x[5]  = PCM(1 + (9-8) * nchannels);
> +            x[6]  = PCM(1 + (12-8) * nchannels);
> +            x[7]  = PCM(1 + (10-8) * nchannels);
> +            x[8]  = PCM(1 + (11-8) * nchannels);
> +        }
> +
> +        pcm += 16 * nchannels;
> +    }
> +
> +    /* copy/permutate audio samples */
> +    while (nsamples >= 16) {
> +        position -= 16;
> +        if (nchannels > 0) {
> +            int16_t *x = &X[0][position];
> +            x[0]  = PCM(0 + 15 * nchannels);
> +            x[1]  = PCM(0 + 7 * nchannels);
> +            x[2]  = PCM(0 + 14 * nchannels);
> +            x[3]  = PCM(0 + 8 * nchannels);
> +            x[4]  = PCM(0 + 13 * nchannels);
> +            x[5]  = PCM(0 + 9 * nchannels);
> +            x[6]  = PCM(0 + 12 * nchannels);
> +            x[7]  = PCM(0 + 10 * nchannels);
> +            x[8]  = PCM(0 + 11 * nchannels);
> +            x[9]  = PCM(0 + 3 * nchannels);
> +            x[10] = PCM(0 + 6 * nchannels);
> +            x[11] = PCM(0 + 0 * nchannels);
> +            x[12] = PCM(0 + 5 * nchannels);
> +            x[13] = PCM(0 + 1 * nchannels);
> +            x[14] = PCM(0 + 4 * nchannels);
> +            x[15] = PCM(0 + 2 * nchannels);
> +        }
> +        if (nchannels > 1) {
> +            int16_t *x = &X[1][position];
> +            x[0]  = PCM(1 + 15 * nchannels);
> +            x[1]  = PCM(1 + 7 * nchannels);
> +            x[2]  = PCM(1 + 14 * nchannels);
> +            x[3]  = PCM(1 + 8 * nchannels);
> +            x[4]  = PCM(1 + 13 * nchannels);
> +            x[5]  = PCM(1 + 9 * nchannels);
> +            x[6]  = PCM(1 + 12 * nchannels);
> +            x[7]  = PCM(1 + 10 * nchannels);
> +            x[8]  = PCM(1 + 11 * nchannels);
> +            x[9]  = PCM(1 + 3 * nchannels);
> +            x[10] = PCM(1 + 6 * nchannels);
> +            x[11] = PCM(1 + 0 * nchannels);
> +            x[12] = PCM(1 + 5 * nchannels);
> +            x[13] = PCM(1 + 1 * nchannels);
> +            x[14] = PCM(1 + 4 * nchannels);
> +            x[15] = PCM(1 + 2 * nchannels);
> +        }
> +        pcm += 32 * nchannels;
> +        nsamples -= 16;
> +    }
> +
> +    if (nsamples == 8) {
> +        position -= 8;
> +        if (nchannels > 0) {
> +            int16_t *x = &X[0][position];
> +            x[-7] = PCM(0 + 7 * nchannels);
> +            x[1]  = PCM(0 + 3 * nchannels);
> +            x[2]  = PCM(0 + 6 * nchannels);
> +            x[3]  = PCM(0 + 0 * nchannels);
> +            x[4]  = PCM(0 + 5 * nchannels);
> +            x[5]  = PCM(0 + 1 * nchannels);
> +            x[6]  = PCM(0 + 4 * nchannels);
> +            x[7]  = PCM(0 + 2 * nchannels);
> +        }
> +        if (nchannels > 1) {
> +            int16_t *x = &X[1][position];
> +            x[-7] = PCM(1 + 7 * nchannels);
> +            x[1]  = PCM(1 + 3 * nchannels);
> +            x[2]  = PCM(1 + 6 * nchannels);
> +            x[3]  = PCM(1 + 0 * nchannels);
> +            x[4]  = PCM(1 + 5 * nchannels);
> +            x[5]  = PCM(1 + 1 * nchannels);
> +            x[6]  = PCM(1 + 4 * nchannels);
> +            x[7]  = PCM(1 + 2 * nchannels);
> +        }
> +    }
> +
> +    return position;
> +}
> +
> +/*
> + * Input data processing functions. The data is endian converted if
> needed,
> + * channels are deintrleaved and audio samples are reordered for use in
> + * SIMD-friendly analysis filter function. The results are put into "X"
> + * array, getting appended to the previous data (or it is better to say
> + * prepended, as the buffer is filled from top to bottom). Old data is
> + * discarded when neededed, but availability of (10 * nrof_subbands)
> + * contiguous samples is always guaranteed for the input to the analysis
> + * filter. This is achieved by copying a sufficient part of old data
> + * to the top of the buffer on buffer wraparound.
> + */
> +
> +static int sbc_enc_process_input_4s(int position, const uint8_t *pcm,
> +                                    int16_t X[2][SBC_X_BUFFER_SIZE],
> +                                    int nsamples, int nchannels)
> +{
> +    if (nchannels > 1)
> +        return sbc_encoder_process_input_s4_internal(
> +            position, pcm, X, nsamples, 2);
> +    else
> +        return sbc_encoder_process_input_s4_internal(
> +            position, pcm, X, nsamples, 1);
> +}
>

That's just silly, do
return sbc_encoder_process_input_s4_internal(position, pcm, X, nsamples, 1
+ (nchannels > 1));

Or better yet remove the wrapper function.



> +
> +static int sbc_enc_process_input_8s(int position, const uint8_t *pcm,
> +                                    int16_t X[2][SBC_X_BUFFER_SIZE],
> +                                    int nsamples, int nchannels)
> +{
> +    if (nchannels > 1)
> +        return sbc_encoder_process_input_s8_internal(
> +            position, pcm, X, nsamples, 2);
> +    else
> +        return sbc_encoder_process_input_s8_internal(
> +            position, pcm, X, nsamples, 1);
> +}
> +
>

Same here.


>
> +++ b/libavcodec/sbcdsp_data.c
> @@ -0,0 +1,335 @@
> +/*
> + * Bluetooth low-complexity, subband codec (SBC)
> + *
> + * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
> + * Copyright (C) 2008-2010  Nokia Corporation
> + * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
> + * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
> + * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * miscellaneous SBC tables
> + */
> +
> +#include "sbcdsp_data.h"
> +
> +#define F_PROTO4(x) (int32_t) ((x * 2) * \
> +    ((int32_t) 1 << (sizeof(int16_t) * CHAR_BIT - 1)) + 0.5)
> +#define F_COS4(x) (int32_t) ((x) * \
> +    ((int32_t) 1 << (sizeof(int16_t) * CHAR_BIT - 1)) + 0.5)
> +#define F_PROTO8(x) (int32_t) ((x * 2) * \
> +    ((int32_t) 1 << (sizeof(int16_t) * CHAR_BIT - 1)) + 0.5)
> +#define F_COS8(x) (int32_t) ((x) * \
> +    ((int32_t) 1 << (sizeof(int16_t) * CHAR_BIT - 1)) + 0.5)
> +
>


We require 8 bit bytes, so s/CHAR_BIT/8/g throughout.


+++ b/libavcodec/sbcdsp_data.h
> @@ -0,0 +1,57 @@
> +/*
> + * Bluetooth low-complexity, subband codec (SBC)
> + *
> + * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
> + * Copyright (C) 2008-2010  Nokia Corporation
> + * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
> + * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
> + * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * miscellaneous SBC tables
> + */
> +
> +#ifndef AVCODEC_SBCDSP_DATA_H
> +#define AVCODEC_SBCDSP_DATA_H
> +
> +#include "sbc.h"
> +
> +#define SBC_PROTO_FIXED4_SCALE      ((sizeof(int16_t) * CHAR_BIT - 1) + 1)
> +#define SBC_COS_TABLE_FIXED4_SCALE  ((sizeof(int16_t) * CHAR_BIT - 1)    )
> +#define SBC_PROTO_FIXED8_SCALE      ((sizeof(int16_t) * CHAR_BIT - 1) + 1)
> +#define SBC_COS_TABLE_FIXED8_SCALE  ((sizeof(int16_t) * CHAR_BIT - 1)    )
> +
>

Same


>
> +
> +    /* align the last crc byte */
> +    if (crc_pos % 8)
> +        crc_header[crc_pos >> 3] <<= 8 - (crc_pos % 8);
>
>
put_bits_align?


> +    avpkt->data[3] = ff_sbc_crc8(crc_header, crc_pos);
> +
> +    ff_sbc_calculate_bits(frame, bits);
> +
> +    for (ch = 0; ch < frame_channels; ch++) {
> +        for (sb = 0; sb < frame_subbands; sb++) {
> +            levels[ch][sb] = ((1 << bits[ch][sb]) - 1) <<
> +                (32 - (frame->scale_factor[ch][sb] +
> +                    SCALE_OUT_BITS + 2));
> +            sb_sample_delta[ch][sb] = (uint32_t) 1 <<
> +                (frame->scale_factor[ch][sb] +
> +                    SCALE_OUT_BITS + 1);
> +        }
> +    }
> +
> +    for (blk = 0; blk < frame->blocks; blk++) {
> +        for (ch = 0; ch < frame_channels; ch++) {
> +            for (sb = 0; sb < frame_subbands; sb++) {
> +
> +                if (bits[ch][sb] == 0)
> +                    continue;
> +
> +                audio_sample = ((uint64_t) levels[ch][sb] *
> +                    (sb_sample_delta[ch][sb] +
> +                    frame->sb_sample_f[blk][ch][sb])) >> 32;
> +
> +                put_bits(&pb, bits[ch][sb], audio_sample);
> +            }
> +        }
> +    }
> +
> +    flush_put_bits(&pb);
> +
> +    return (put_bits_count(&pb) + 7) / 8;
> +}
> +
> +static size_t sbc_pack_frame(AVPacket *avpkt, struct sbc_frame *frame,
> int joint)
> +{
> +    int frame_subbands = 4;
> +
> +    avpkt->data[0] = SBC_SYNCWORD;
> +
> +    avpkt->data[1] = (frame->frequency & 0x03) << 6;
> +    avpkt->data[1] |= (frame->block_mode & 0x03) << 4;
> +    avpkt->data[1] |= (frame->mode & 0x03) << 2;
> +    avpkt->data[1] |= (frame->allocation & 0x01) << 1;
> +
>

Use put_bits?


> +
> +    if (frame->subbands == 4) {
> +        if (frame->channels == 1)
> +            return sbc_pack_frame_internal(avpkt, frame, 4, 1, joint);
>

return sbc_pack_frame_internal(avpkt, frame, 4, 1 + (frame->channels == 1),
joint);


> +            return sbc_pack_frame_internal(avpkt, frame, 8, 1, joint);
> +        else
> +            return sbc_pack_frame_internal(avpkt, frame, 8, 2, joint);
>

return sbc_pack_frame_internal(avpkt, frame, 8, 1 + (frame->channels == 1),
joint);


> +    }
> +}
> +
> +static size_t msbc_pack_frame(AVPacket *avpkt, struct sbc_frame *frame,
> int joint)
> +{
> +    avpkt->data[0] = MSBC_SYNCWORD;
> +    avpkt->data[1] = 0;
> +    avpkt->data[2] = 0;
> +
> +    return sbc_pack_frame_internal(avpkt, frame, 8, 1, joint);
> +}
> +
> +static void sbc_encoder_init(bool msbc, SBCDSPContext *s,
> +                             const struct sbc_frame *frame)
> +{
> +    memset(&s->X, 0, sizeof(s->X));
> +    s->position = (SBC_X_BUFFER_SIZE - frame->subbands * 9) & ~7;
> +    if (msbc)
> +        s->increment = 1;
> +    else
> +        s->increment = 4;

+
>

Save a line, use a ternary.


> +
> +    sbc->pack_frame = sbc_pack_frame;
> +
> +    sbc->frequency = SBC_FREQ_44100;
>


Yet in the AVCodec structure the encoder specifies it supports 16khz, 32khz
and 48khz.
You should remove the SBC_FREQ macros and use avctx->sample_rate directly.
Also remove any unsupported samplerates.



> +    sbc->mode = SBC_MODE_STEREO;
> +    if (sbc->joint_stereo)
> +        sbc->mode = SBC_MODE_JOINT_STEREO;
> +    else if (sbc->dual_channel)
> +        sbc->mode = SBC_MODE_DUAL_CHANNEL;
> +    sbc->subbands >>= 3;
> +    sbc->blocks = (sbc->blocks >> 2) - 1;
> +
> +    if (!avctx->frame_size)
> +        avctx->frame_size = 4*(sbc->subbands + 1) * 4*(sbc->blocks + 1);
> +
> +    for (int i = 0; avctx->codec->supported_samplerates[i]; i++)
> +        if (avctx->sample_rate == avctx->codec->supported_samplerates[i])
> +            sbc->frequency = i;
> +
> +    if (avctx->channels == 1)
> +        sbc->mode = SBC_MODE_MONO;
> +
> +    return 0;
> +}
> +
> +static int msbc_encode_init(AVCodecContext *avctx)
> +{
> +    SBCEncContext *sbc = avctx->priv_data;
> +
> +    sbc->msbc = true;
> +    sbc->pack_frame = msbc_pack_frame;
> +
> +    sbc->frequency = SBC_FREQ_16000;
> +    sbc->blocks = MSBC_BLOCKS;
> +    sbc->subbands = SBC_SB_8;
> +    sbc->mode = SBC_MODE_MONO;
> +    sbc->allocation = SBC_AM_LOUDNESS;
> +    sbc->bitpool = 26;
> +
> +    if (!avctx->frame_size)
> +        avctx->frame_size = 8 * MSBC_BLOCKS;
> +
>

Does the encoder actually accept arbitrary custom frame sizes?


>
> +
> +#if CONFIG_SBC_ENCODER
> +AVCodec ff_sbc_encoder = {
> +    .name                  = "sbc",
> +    .long_name             = NULL_IF_CONFIG_SMALL("SBC (low-complexity
> subband codec)"),
> +    .type                  = AVMEDIA_TYPE_AUDIO,
> +    .id                    = AV_CODEC_ID_SBC,
> +    .priv_data_size        = sizeof(SBCEncContext),
> +    .init                  = sbc_encode_init,
> +    .encode2               = sbc_encode_frame,
> +    .channel_layouts       = (const uint64_t[]) { AV_CH_LAYOUT_MONO,
> +                                                  AV_CH_LAYOUT_STEREO, 0},
> +    .sample_fmts           = (const enum AVSampleFormat[]) {
> AV_SAMPLE_FMT_S16,
> +
>  AV_SAMPLE_FMT_NONE },
>

Planar?


> +    .supported_samplerates = (const int[]) { 16000, 32000, 44100, 48000,
> 0 },
>

Remove the samplerates the encoder doesn't support.
Also add the internal codec cap about threadsafe init since the encoder
doesn't init any global tables to both this and the aptX encoders.


>
> +
> +;*******************************************************************
> +;void ff_sbc_analyze_4(const int16_t *in, int32_t *out, const int16_t
> *consts);
> +;*******************************************************************
> +INIT_MMX mmx
> +cglobal sbc_analyze_4, 3, 3, 4, in, out, consts
> +    movq          m0, [inq]
> +    movq          m1, [inq+8]
> +    pmaddwd       m0, [constsq]
> +    pmaddwd       m1, [constsq+8]
> +    paddd         m0, [scale_mask]
> +    paddd         m1, [scale_mask]
> +
> +    movq          m2, [inq+16]
> +    movq          m3, [inq+24]
> +    pmaddwd       m2, [constsq+16]
> +    pmaddwd       m3, [constsq+24]
> +    paddd         m0, m2
> +    paddd         m1, m3
> +
> +    movq          m2, [inq+32]
> +    movq          m3, [inq+40]
> +    pmaddwd       m2, [constsq+32]
> +    pmaddwd       m3, [constsq+40]
> +    paddd         m0, m2
> +    paddd         m1, m3
> +
> +    movq          m2, [inq+48]
> +    movq          m3, [inq+56]
> +    pmaddwd       m2, [constsq+48]
> +    pmaddwd       m3, [constsq+56]
> +    paddd         m0, m2
> +    paddd         m1, m3
> +
> +    movq          m2, [inq+64]
> +    movq          m3, [inq+72]
> +    pmaddwd       m2, [constsq+64]
> +    pmaddwd       m3, [constsq+72]
> +    paddd         m0, m2
> +    paddd         m1, m3
> +
>

Loops?


> +    psrad         m0, 16    ; SBC_PROTO_FIXED4_SCALE
> +    psrad         m1, 16    ; SBC_PROTO_FIXED4_SCALE
> +    packssdw      m0, m0
> +    packssdw      m1, m1
> +
> +    movq          m2, m0
> +    pmaddwd       m0, [constsq+80]
> +    pmaddwd       m2, [constsq+88]
> +
> +    movq          m3, m1
> +    pmaddwd       m1, [constsq+96]
> +    pmaddwd       m3, [constsq+104]
> +    paddd         m0, m1
> +    paddd         m2, m3
> +
> +    movq          [outq  ], m0
> +    movq          [outq+8], m2
> +
> +    RET
> +
> +
> +
> +;*******************************************************************
> +;void ff_sbc_analyze_8(const int16_t *in, int32_t *out, const int16_t
> *consts);
> +;*******************************************************************
> +INIT_MMX mmx
> +cglobal sbc_analyze_8, 3, 3, 4, in, out, consts
> +    movq          m0, [inq]
> +    movq          m1, [inq+8]
> +    movq          m2, [inq+16]
> +    movq          m3, [inq+24]
> +    pmaddwd       m0, [constsq]
> +    pmaddwd       m1, [constsq+8]
> +    pmaddwd       m2, [constsq+16]
> +    pmaddwd       m3, [constsq+24]
> +    paddd         m0, [scale_mask]
> +    paddd         m1, [scale_mask]
> +    paddd         m2, [scale_mask]
> +    paddd         m3, [scale_mask]
> +
> +    movq          m4, [inq+32]
> +    movq          m5, [inq+40]
> +    movq          m6, [inq+48]
> +    movq          m7, [inq+56]
> +    pmaddwd       m4, [constsq+32]
> +    pmaddwd       m5, [constsq+40]
> +    pmaddwd       m6, [constsq+48]
> +    pmaddwd       m7, [constsq+56]
> +    paddd         m0, m4
> +    paddd         m1, m5
> +    paddd         m2, m6
> +    paddd         m3, m7
> +
> +    movq          m4, [inq+64]
> +    movq          m5, [inq+72]
> +    movq          m6, [inq+80]
> +    movq          m7, [inq+88]
> +    pmaddwd       m4, [constsq+64]
> +    pmaddwd       m5, [constsq+72]
> +    pmaddwd       m6, [constsq+80]
> +    pmaddwd       m7, [constsq+88]
> +    paddd         m0, m4
> +    paddd         m1, m5
> +    paddd         m2, m6
> +    paddd         m3, m7
> +
> +    movq          m4, [inq+96]
> +    movq          m5, [inq+104]
> +    movq          m6, [inq+112]
> +    movq          m7, [inq+120]
> +    pmaddwd       m4, [constsq+96]
> +    pmaddwd       m5, [constsq+104]
> +    pmaddwd       m6, [constsq+112]
> +    pmaddwd       m7, [constsq+120]
> +    paddd         m0, m4
> +    paddd         m1, m5
> +    paddd         m2, m6
> +    paddd         m3, m7
> +
> +    movq          m4, [inq+128]
> +    movq          m5, [inq+136]
> +    movq          m6, [inq+144]
> +    movq          m7, [inq+152]
> +    pmaddwd       m4, [constsq+128]
> +    pmaddwd       m5, [constsq+136]
> +    pmaddwd       m6, [constsq+144]
> +    pmaddwd       m7, [constsq+152]
> +    paddd         m0, m4
> +    paddd         m1, m5
> +    paddd         m2, m6
> +    paddd         m3, m7
> +
> +    psrad         m0, 16    ; SBC_PROTO_FIXED8_SCALE
> +    psrad         m1, 16    ; SBC_PROTO_FIXED8_SCALE
> +    psrad         m2, 16    ; SBC_PROTO_FIXED8_SCALE
> +    psrad         m3, 16    ; SBC_PROTO_FIXED8_SCALE
> +
> +    packssdw      m0, m0
> +    packssdw      m1, m1
> +    packssdw      m2, m2
> +    packssdw      m3, m3
> +
> +    movq          m4, m0
> +    movq          m5, m0
> +    pmaddwd       m4, [constsq+160]
> +    pmaddwd       m5, [constsq+168]
> +
> +    movq          m6, m1
> +    movq          m7, m1
> +    pmaddwd       m6, [constsq+192]
> +    pmaddwd       m7, [constsq+200]
> +    paddd         m4, m6
> +    paddd         m5, m7
> +
> +    movq          m6, m2
> +    movq          m7, m2
> +    pmaddwd       m6, [constsq+224]
> +    pmaddwd       m7, [constsq+232]
> +    paddd         m4, m6
> +    paddd         m5, m7
> +
> +    movq          m6, m3
> +    movq          m7, m3
> +    pmaddwd       m6, [constsq+256]
> +    pmaddwd       m7, [constsq+264]
> +    paddd         m4, m6
> +    paddd         m5, m7
> +
> +    movq          [outq  ], m4
> +    movq          [outq+8], m5
> +
> +    movq          m5, m0
> +    pmaddwd       m0, [constsq+176]
> +    pmaddwd       m5, [constsq+184]
> +
> +    movq          m7, m1
> +    pmaddwd       m1, [constsq+208]
> +    pmaddwd       m7, [constsq+216]
> +    paddd         m0, m1
> +    paddd         m5, m7
> +
> +    movq          m7, m2
> +    pmaddwd       m2, [constsq+240]
> +    pmaddwd       m7, [constsq+248]
> +    paddd         m0, m2
> +    paddd         m5, m7
> +
> +    movq          m7, m3
> +    pmaddwd       m3, [constsq+272]
> +    pmaddwd       m7, [constsq+280]
> +    paddd         m0, m3
> +    paddd         m5, m7
> +


Has the person writing the SIMD seriously not heard of loops?
I see no reason for this to not work on larger registers if loops were used
here.
This seems trivial do to properly so if you can't be bothered to fix it
leave it to me or jamrial to do after the core of the encoder has been
merged.


> +    movq          [outq+16], m0
> +    movq          [outq+24], m5
> +
> +    RET
> +
> +
> +;*******************************************************************
> +;void ff_sbc_calc_scalefactors(int32_t sb_sample_f[16][2][8],
> +;                              uint32_t scale_factor[2][8],
> +;                              int blocks, int channels, int subbands)
> +;*******************************************************************
> +INIT_MMX mmx
> +cglobal sbc_calc_scalefactors, 5, 9, 3, sb_sample_f, scale_factor,
> blocks, channels, subbands, ch, sb, sa, sf, blk
> +    shl           channelsd, 5
> +    mov           chq, 0
> +.loop_1:
> +    lea           saq, [sb_sample_fq + chq]
> +    lea           sfq, [scale_factorq + chq]
> +
> +    mov           sbd, 0
> +.loop_2:
> +    ; blk = (blocks - 1) * 64;
> +    lea           blkq, [blocksq - 1]
> +    shl           blkd, 6
> +
> +    movq          m0, [scale_mask]
> +.loop_3:
> +    movq          m1, [saq+blkq]
> +    pxor          m2, m2
> +    pcmpgtd       m1, m2
> +    paddd         m1, [saq+blkq]
> +    pcmpgtd       m2, m1
> +    pxor          m1, m2
> +
> +    por           m0, m1
> +
> +    sub           blkd, 64
> +    jns           .loop_3
> +
> +    movd          blkd, m0
> +    psrlq         m0,   32
> +    bsr           blkd, blkd
> +    sub           blkd, 15    ; SCALE_OUT_BITS
> +    mov           [sfq], blkd
> +
> +    movd          blkd, m0
> +    bsr           blkd, blkd
> +    sub           blkd, 15    ; SCALE_OUT_BITS
> +    mov           [sfq+4], blkd
> +
> +    add           saq, 8
> +    add           sfq, 8
> +
> +    add           sbd, 2
> +    cmp           sbd, subbandsd
> +    jl            .loop_2
> +
> +    add           chd, 32
> +    cmp           chd, channelsd
> +    jl            .loop_1
> +
>

This function's hardly doing SIMD and I would like to see comparison to the
C version before accepting it. I somehow doubt it'll be faster.

+
> +av_cold void ff_sbcdsp_init_x86(SBCDSPContext *s)
> +{
> +    int cpu_flags = av_get_cpu_flags();
> +
> +    if (EXTERNAL_MMX(cpu_flags)) {
> +        s->sbc_analyze_4 = ff_sbc_analyze_4_mmx;
> +        s->sbc_analyze_8 = ff_sbc_analyze_8_mmx;
> +        s->sbc_calc_scalefactors = ff_sbc_calc_scalefactors_mmx;
> +    }
> +}
>


MMX? In this day and age?



Anyway, its mostly not bad, will need some work before its cleaned of
libsbc's NIH.
Carl Eugen Hoyos Nov. 6, 2017, 12:53 p.m. UTC | #3
2017-11-06 5:40 GMT+01:00 Rostislav Pehlivanov <atomnuker@gmail.com>:

>> +    {
>> +        .id        = AV_CODEC_ID_SBC,
>> +        .type      = AVMEDIA_TYPE_AUDIO,
>> +        .name      = "sbc",
>> +        .long_name = NULL_IF_CONFIG_SMALL("SBC (low-complexity subband
>> codec)"),
>> +    },
>> +    {
>> +        .id        = AV_CODEC_ID_MSBC,
>> +        .type      = AVMEDIA_TYPE_AUDIO,
>> +        .name      = "msbc",
>> +        .long_name = NULL_IF_CONFIG_SMALL("mSBC (wideband speech mono
>> SBC)"),
>> +    },
>>
>
> Is there a bitstream difference between the two? I don't think so, so you
> should instead define FF_PROFILE_SBC_WB and use a single codec ID.

Would that have an advantage?

One day, somebody will it into isom and define two different codec_tags...

Carl Eugen
James Almer Nov. 6, 2017, 1:35 p.m. UTC | #4
On 11/6/2017 9:53 AM, Carl Eugen Hoyos wrote:
> 2017-11-06 5:40 GMT+01:00 Rostislav Pehlivanov <atomnuker@gmail.com>:
> 
>>> +    {
>>> +        .id        = AV_CODEC_ID_SBC,
>>> +        .type      = AVMEDIA_TYPE_AUDIO,
>>> +        .name      = "sbc",
>>> +        .long_name = NULL_IF_CONFIG_SMALL("SBC (low-complexity subband
>>> codec)"),
>>> +    },
>>> +    {
>>> +        .id        = AV_CODEC_ID_MSBC,
>>> +        .type      = AVMEDIA_TYPE_AUDIO,
>>> +        .name      = "msbc",
>>> +        .long_name = NULL_IF_CONFIG_SMALL("mSBC (wideband speech mono
>>> SBC)"),
>>> +    },
>>>
>>
>> Is there a bitstream difference between the two? I don't think so, so you
>> should instead define FF_PROFILE_SBC_WB and use a single codec ID.
> 
> Would that have an advantage?
> 
> One day, somebody will it into isom and define two different codec_tags...

DTS audio has a lot of different codec tags in isom, including DTS-E
which is a completely different bitstream altogether, and we have a
single codec ID for them all.

The de/muxer can handle it just fine, so it's not an issue.

> 
> Carl Eugen
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
James Almer Nov. 6, 2017, 3:53 p.m. UTC | #5
On 11/5/2017 8:35 PM, Aurelien Jacobs wrote:
> This was originally based on libsbc, and was fully integrated into ffmpeg.
> ---
>  doc/general.texi                 |   2 +
>  libavcodec/Makefile              |   4 +
>  libavcodec/allcodecs.c           |   2 +
>  libavcodec/arm/Makefile          |   3 +
>  libavcodec/arm/sbcdsp_armv6.S    | 245 ++++++++++++++
>  libavcodec/arm/sbcdsp_init_arm.c | 105 ++++++
>  libavcodec/arm/sbcdsp_neon.S     | 714 +++++++++++++++++++++++++++++++++++++++
>  libavcodec/avcodec.h             |   2 +
>  libavcodec/codec_desc.c          |  12 +
>  libavcodec/sbc.c                 | 316 +++++++++++++++++
>  libavcodec/sbc.h                 | 121 +++++++
>  libavcodec/sbcdec.c              | 469 +++++++++++++++++++++++++
>  libavcodec/sbcdec_data.c         | 127 +++++++
>  libavcodec/sbcdec_data.h         |  44 +++
>  libavcodec/sbcdsp.c              | 569 +++++++++++++++++++++++++++++++
>  libavcodec/sbcdsp.h              |  86 +++++
>  libavcodec/sbcdsp_data.c         | 335 ++++++++++++++++++
>  libavcodec/sbcdsp_data.h         |  57 ++++
>  libavcodec/sbcenc.c              | 461 +++++++++++++++++++++++++
>  libavcodec/x86/Makefile          |   2 +
>  libavcodec/x86/sbcdsp.asm        | 290 ++++++++++++++++
>  libavcodec/x86/sbcdsp_init.c     |  51 +++
>  22 files changed, 4017 insertions(+)
>  create mode 100644 libavcodec/arm/sbcdsp_armv6.S
>  create mode 100644 libavcodec/arm/sbcdsp_init_arm.c
>  create mode 100644 libavcodec/arm/sbcdsp_neon.S
>  create mode 100644 libavcodec/sbc.c
>  create mode 100644 libavcodec/sbc.h
>  create mode 100644 libavcodec/sbcdec.c
>  create mode 100644 libavcodec/sbcdec_data.c
>  create mode 100644 libavcodec/sbcdec_data.h
>  create mode 100644 libavcodec/sbcdsp.c
>  create mode 100644 libavcodec/sbcdsp.h
>  create mode 100644 libavcodec/sbcdsp_data.c
>  create mode 100644 libavcodec/sbcdsp_data.h
>  create mode 100644 libavcodec/sbcenc.c
>  create mode 100644 libavcodec/x86/sbcdsp.asm
>  create mode 100644 libavcodec/x86/sbcdsp_init.c

This needs to be split into at least four patches.
One to add the decoder (plus codec ID, descriptor and such things), one
to add the encoder (and the dsp framework), one to add the x86 assembly
optimizations for the encoder, and one for the arm optimizations.
Aurelien Jacobs Dec. 17, 2017, 9:42 p.m. UTC | #6
On Mon, Nov 06, 2017 at 04:40:56AM +0000, Rostislav Pehlivanov wrote:
> On 5 November 2017 at 23:35, Aurelien Jacobs <aurel@gnuage.org> wrote:
> 
> > This was originally based on libsbc, and was fully integrated into ffmpeg.
> > ---
> >  doc/general.texi                 |   2 +
> >  libavcodec/Makefile              |   4 +
> >  libavcodec/allcodecs.c           |   2 +
> >  libavcodec/arm/Makefile          |   3 +
> >  libavcodec/arm/sbcdsp_armv6.S    | 245 ++++++++++++++
> >  libavcodec/arm/sbcdsp_init_arm.c | 105 ++++++
> >  libavcodec/arm/sbcdsp_neon.S     | 714 ++++++++++++++++++++++++++++++
> > +++++++++
> >  libavcodec/avcodec.h             |   2 +
> >  libavcodec/codec_desc.c          |  12 +
> >  libavcodec/sbc.c                 | 316 +++++++++++++++++
> >  libavcodec/sbc.h                 | 121 +++++++
> >  libavcodec/sbcdec.c              | 469 +++++++++++++++++++++++++
> >  libavcodec/sbcdec_data.c         | 127 +++++++
> >  libavcodec/sbcdec_data.h         |  44 +++
> >  libavcodec/sbcdsp.c              | 569 +++++++++++++++++++++++++++++++
> >  libavcodec/sbcdsp.h              |  86 +++++
> >  libavcodec/sbcdsp_data.c         | 335 ++++++++++++++++++
> >  libavcodec/sbcdsp_data.h         |  57 ++++
> >  libavcodec/sbcenc.c              | 461 +++++++++++++++++++++++++
> >  libavcodec/x86/Makefile          |   2 +
> >  libavcodec/x86/sbcdsp.asm        | 290 ++++++++++++++++
> >  libavcodec/x86/sbcdsp_init.c     |  51 +++
> >  22 files changed, 4017 insertions(+)
> >  create mode 100644 libavcodec/arm/sbcdsp_armv6.S
> >  create mode 100644 libavcodec/arm/sbcdsp_init_arm.c
> >  create mode 100644 libavcodec/arm/sbcdsp_neon.S
> >  create mode 100644 libavcodec/sbc.c
> >  create mode 100644 libavcodec/sbc.h
> >  create mode 100644 libavcodec/sbcdec.c
> >  create mode 100644 libavcodec/sbcdec_data.c
> >  create mode 100644 libavcodec/sbcdec_data.h
> >  create mode 100644 libavcodec/sbcdsp.c
> >  create mode 100644 libavcodec/sbcdsp.h
> >  create mode 100644 libavcodec/sbcdsp_data.c
> >  create mode 100644 libavcodec/sbcdsp_data.h
> >  create mode 100644 libavcodec/sbcenc.c
> >  create mode 100644 libavcodec/x86/sbcdsp.asm
> >  create mode 100644 libavcodec/x86/sbcdsp_init.c
> >
> > diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
> > index c4134424f0..2d541bf64a 100644
> > --- a/libavcodec/avcodec.h
> > +++ b/libavcodec/avcodec.h
> > @@ -632,6 +632,8 @@ enum AVCodecID {
> >      AV_CODEC_ID_ATRAC3AL,
> >      AV_CODEC_ID_ATRAC3PAL,
> >      AV_CODEC_ID_DOLBY_E,
> > +    AV_CODEC_ID_SBC,
> > +    AV_CODEC_ID_MSBC,
> >
> >
> See below.
> 
> 
> >      /* subtitle codecs */
> >      AV_CODEC_ID_FIRST_SUBTITLE = 0x17000,          ///< A dummy ID
> > pointing at the start of subtitle codecs.
> > diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
> > index 92bf1d2681..8d613507e0 100644
> > --- a/libavcodec/codec_desc.c
> > +++ b/libavcodec/codec_desc.c
> > @@ -2859,6 +2859,18 @@ static const AVCodecDescriptor codec_descriptors[]
> > = {
> >          .long_name = NULL_IF_CONFIG_SMALL("ADPCM MTAF"),
> >          .props     = AV_CODEC_PROP_LOSSY,
> >      },
> > +    {
> > +        .id        = AV_CODEC_ID_SBC,
> > +        .type      = AVMEDIA_TYPE_AUDIO,
> > +        .name      = "sbc",
> > +        .long_name = NULL_IF_CONFIG_SMALL("SBC (low-complexity subband
> > codec)"),
> > +    },
> > +    {
> > +        .id        = AV_CODEC_ID_MSBC,
> > +        .type      = AVMEDIA_TYPE_AUDIO,
> > +        .name      = "msbc",
> > +        .long_name = NULL_IF_CONFIG_SMALL("mSBC (wideband speech mono
> > SBC)"),
> > +    },
> >
> 
> Is there a bitstream difference between the two? I don't think so, so you
> should instead define FF_PROFILE_SBC_WB and use a single codec ID.

SBC support various samplerates while mSBC is limited to 16 kHz.
I think the only way to declare this properly and to get automatic
conversion to 16 kHz when encoding to mSBC is to have 2 separate
codec ID.
So I kept the 2 separate codec ID.

> > diff --git a/libavcodec/sbc.c b/libavcodec/sbc.c
> > new file mode 100644
> > index 0000000000..99d02cc56a
> > --- /dev/null
> > +++ b/libavcodec/sbc.c
> > @@ -0,0 +1,316 @@
> > +/*
> > + * Bluetooth low-complexity, subband codec (SBC)
> > + *
> > + * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
> > + * Copyright (C) 2012-2013  Intel Corporation
> > + * Copyright (C) 2008-2010  Nokia Corporation
> > + * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
> > + * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
> > + * Copyright (C) 2005-2008  Brad Midgley <bmidgley@xmission.com>
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> > 02110-1301 USA
> > + */
> > +
> > +/**
> > + * @file
> > + * SBC common functions for the encoder and decoder
> > + */
> > +
> > +#include "avcodec.h"
> > +#include "sbc.h"
> > +
> > +/*
> > + * Calculates the CRC-8 of the first len bits in data
> > + */
> > +static const uint8_t crc_table[256] = {
> > +    0x00, 0x1D, 0x3A, 0x27, 0x74, 0x69, 0x4E, 0x53,
> > +    0xE8, 0xF5, 0xD2, 0xCF, 0x9C, 0x81, 0xA6, 0xBB,
> > +    0xCD, 0xD0, 0xF7, 0xEA, 0xB9, 0xA4, 0x83, 0x9E,
> > +    0x25, 0x38, 0x1F, 0x02, 0x51, 0x4C, 0x6B, 0x76,
> > +    0x87, 0x9A, 0xBD, 0xA0, 0xF3, 0xEE, 0xC9, 0xD4,
> > +    0x6F, 0x72, 0x55, 0x48, 0x1B, 0x06, 0x21, 0x3C,
> > +    0x4A, 0x57, 0x70, 0x6D, 0x3E, 0x23, 0x04, 0x19,
> > +    0xA2, 0xBF, 0x98, 0x85, 0xD6, 0xCB, 0xEC, 0xF1,
> > +    0x13, 0x0E, 0x29, 0x34, 0x67, 0x7A, 0x5D, 0x40,
> > +    0xFB, 0xE6, 0xC1, 0xDC, 0x8F, 0x92, 0xB5, 0xA8,
> > +    0xDE, 0xC3, 0xE4, 0xF9, 0xAA, 0xB7, 0x90, 0x8D,
> > +    0x36, 0x2B, 0x0C, 0x11, 0x42, 0x5F, 0x78, 0x65,
> > +    0x94, 0x89, 0xAE, 0xB3, 0xE0, 0xFD, 0xDA, 0xC7,
> > +    0x7C, 0x61, 0x46, 0x5B, 0x08, 0x15, 0x32, 0x2F,
> > +    0x59, 0x44, 0x63, 0x7E, 0x2D, 0x30, 0x17, 0x0A,
> > +    0xB1, 0xAC, 0x8B, 0x96, 0xC5, 0xD8, 0xFF, 0xE2,
> > +    0x26, 0x3B, 0x1C, 0x01, 0x52, 0x4F, 0x68, 0x75,
> > +    0xCE, 0xD3, 0xF4, 0xE9, 0xBA, 0xA7, 0x80, 0x9D,
> > +    0xEB, 0xF6, 0xD1, 0xCC, 0x9F, 0x82, 0xA5, 0xB8,
> > +    0x03, 0x1E, 0x39, 0x24, 0x77, 0x6A, 0x4D, 0x50,
> > +    0xA1, 0xBC, 0x9B, 0x86, 0xD5, 0xC8, 0xEF, 0xF2,
> > +    0x49, 0x54, 0x73, 0x6E, 0x3D, 0x20, 0x07, 0x1A,
> > +    0x6C, 0x71, 0x56, 0x4B, 0x18, 0x05, 0x22, 0x3F,
> > +    0x84, 0x99, 0xBE, 0xA3, 0xF0, 0xED, 0xCA, 0xD7,
> > +    0x35, 0x28, 0x0F, 0x12, 0x41, 0x5C, 0x7B, 0x66,
> > +    0xDD, 0xC0, 0xE7, 0xFA, 0xA9, 0xB4, 0x93, 0x8E,
> > +    0xF8, 0xE5, 0xC2, 0xDF, 0x8C, 0x91, 0xB6, 0xAB,
> > +    0x10, 0x0D, 0x2A, 0x37, 0x64, 0x79, 0x5E, 0x43,
> > +    0xB2, 0xAF, 0x88, 0x95, 0xC6, 0xDB, 0xFC, 0xE1,
> > +    0x5A, 0x47, 0x60, 0x7D, 0x2E, 0x33, 0x14, 0x09,
> > +    0x7F, 0x62, 0x45, 0x58, 0x0B, 0x16, 0x31, 0x2C,
> > +    0x97, 0x8A, 0xAD, 0xB0, 0xE3, 0xFE, 0xD9, 0xC4
> > +};
> > +
> > +uint8_t ff_sbc_crc8(const uint8_t *data, size_t len)
> > +{
> > +    uint8_t crc = 0x0f;
> > +    size_t i;
> > +    uint8_t octet;
> > +
> > +    for (i = 0; i < len / 8; i++)
> > +        crc = crc_table[crc ^ data[i]];
> > +
> > +    octet = data[i];
> > +    for (i = 0; i < len % 8; i++) {
> > +        char bit = ((octet ^ crc) & 0x80) >> 7;
> > +
> > +        crc = ((crc & 0x7f) << 1) ^ (bit ? 0x1d : 0);
> > +
> > +        octet = octet << 1;
> > +    }
> > +
> > +    return crc;
> > +}
> >
> 
> 
> We have CRC functions already, look in libavutil/crc.h

I know this and I tried to use them but I couldn't get them to behave
the same as this ff_sbc_crc8.

> > +                        if (subbands == 4)
> > +                            loudness = frame->scale_factor[ch][sb] -
> > sbc_offset4[sf][sb];
> > +                        else
> > +                            loudness = frame->scale_factor[ch][sb] -
> > sbc_offset8[sf][sb];
> > +                        if (loudness > 0)
> > +                            bitneed[ch][sb] = loudness / 2;
> > +                        else
> > +                            bitneed[ch][sb] = loudness;
> >
> 
> 
> bitneed[ch][sb] = loudness >> (loudness > 0);

OK.

> > +
> > +static int sbc_decode_frame(AVCodecContext *avctx,
> > +                            void *data, int *got_frame_ptr,
> > +                            AVPacket *avpkt)
> > +{
> > +    SBCDecContext *sbc = avctx->priv_data;
> > +    int i, ch, samples, ret;
> > +    AVFrame *frame = data;
> > +    int16_t *ptr;
> > +
> > +    if (!sbc)
> > +        return AVERROR(EIO);
> > +
> > +    sbc->frame.length = sbc->unpack_frame(avpkt->data, &sbc->frame,
> > avpkt->size);
> > +    if (sbc->frame.length <= 0)
> > +        return sbc->frame.length;
> > +
> > +    samples = sbc_synthesize_audio(&sbc->dsp, &sbc->frame);
> > +
> > +    frame->nb_samples = samples;
> > +    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
> > +        return ret;
> > +    ptr = (int16_t *)frame->data[0];
> > +
> > +    for (i = 0; i < samples; i++)
> > +        for (ch = 0; ch < sbc->frame.channels; ch++)
> > +            *ptr++ = sbc->frame.pcm_sample[ch][i];
> > +
> >
> 
> Once again, use planar sample formats

Done.

> > +    *got_frame_ptr = 1;
> > +
> > +    return sbc->frame.length;
> > +}
> > +
> > +#if CONFIG_SBC_DECODER
> > +AVCodec ff_sbc_decoder = {
> > +    .name                  = "sbc",
> > +    .long_name             = NULL_IF_CONFIG_SMALL("SBC (low-complexity
> > subband codec)"),
> > +    .type                  = AVMEDIA_TYPE_AUDIO,
> > +    .id                    = AV_CODEC_ID_SBC,
> > +    .priv_data_size        = sizeof(SBCDecContext),
> > +    .init                  = sbc_decode_init,
> > +    .decode                = sbc_decode_frame,
> > +    .capabilities          = AV_CODEC_CAP_DR1,
> > +    .channel_layouts       = (const uint64_t[]) { AV_CH_LAYOUT_MONO,
> > +                                                  AV_CH_LAYOUT_STEREO, 0},
> > +    .sample_fmts           = (const enum AVSampleFormat[]) {
> > AV_SAMPLE_FMT_S16,
> >
> 
> 
> AV_SAMPLE_FMT_S16P

Done.

> > +
> >  AV_SAMPLE_FMT_NONE },
> > +    .supported_samplerates = (const int[]) { 16000, 32000, 44100, 48000,
> > 0 },
> > +};
> > +#endif
> > +
> > +#if CONFIG_MSBC_DECODER
> > +AVCodec ff_msbc_decoder = {
> > +    .name                  = "msbc",
> > +    .long_name             = NULL_IF_CONFIG_SMALL("mSBC (wideband speech
> > mono SBC)"),
> > +    .type                  = AVMEDIA_TYPE_AUDIO,
> > +    .id                    = AV_CODEC_ID_MSBC,
> > +    .priv_data_size        = sizeof(SBCDecContext),
> > +    .init                  = msbc_decode_init,
> > +    .decode                = sbc_decode_frame,
> > +    .capabilities          = AV_CODEC_CAP_DR1,
> > +    .channel_layouts       = (const uint64_t[]) { AV_CH_LAYOUT_MONO, 0},
> > +    .sample_fmts           = (const enum AVSampleFormat[]) {
> > AV_SAMPLE_FMT_S16,
> >
> 
> 
> AV_SAMPLE_FMT_S16P

Done.

> > +
> > +/*
> > + * A reference C code of analysis filter with SIMD-friendly tables
> > + * reordering and code layout. This code can be used to develop platform
> > + * specific SIMD optimizations. Also it may be used as some kind of test
> > + * for compiler autovectorization capabilities (who knows, if the compiler
> > + * is very good at this stuff, hand optimized assembly may be not strictly
> > + * needed for some platform).
> > + *
> > + * Note: It is also possible to make a simple variant of analysis filter,
> > + * which needs only a single constants table without taking care about
> > + * even/odd cases. This simple variant of filter can be implemented
> > without
> > + * input data permutation. The only thing that would be lost is the
> > + * possibility to use pairwise SIMD multiplications. But for some simple
> > + * CPU cores without SIMD extensions it can be useful. If anybody is
> > + * interested in implementing such variant of a filter, sourcecode from
> > + * bluez versions 4.26/4.27 can be used as a reference and the history of
> > + * the changes in git repository done around that time may be worth
> > checking.
> > + */
> > +
> > +static void sbc_analyze_4_simd(const int16_t *in, int32_t *out,
> > +                               const int16_t *consts)
> > +{
> > +    int32_t t1[4];
> > +    int16_t t2[4];
> > +    int hop = 0;
> > +
> > +    /* rounding coefficient */
> > +    t1[0] = t1[1] = t1[2] = t1[3] =
> > +        (int32_t) 1 << (SBC_PROTO_FIXED4_SCALE - 1);
> > +
> > +    /* low pass polyphase filter */
> > +    for (hop = 0; hop < 40; hop += 8) {
> > +        t1[0] += (int32_t) in[hop] * consts[hop];
> > +        t1[0] += (int32_t) in[hop + 1] * consts[hop + 1];
> > +        t1[1] += (int32_t) in[hop + 2] * consts[hop + 2];
> > +        t1[1] += (int32_t) in[hop + 3] * consts[hop + 3];
> > +        t1[2] += (int32_t) in[hop + 4] * consts[hop + 4];
> > +        t1[2] += (int32_t) in[hop + 5] * consts[hop + 5];
> > +        t1[3] += (int32_t) in[hop + 6] * consts[hop + 6];
> > +        t1[3] += (int32_t) in[hop + 7] * consts[hop + 7];
> > +    }
> > +
> > +    /* scaling */
> > +    t2[0] = t1[0] >> SBC_PROTO_FIXED4_SCALE;
> > +    t2[1] = t1[1] >> SBC_PROTO_FIXED4_SCALE;
> > +    t2[2] = t1[2] >> SBC_PROTO_FIXED4_SCALE;
> > +    t2[3] = t1[3] >> SBC_PROTO_FIXED4_SCALE;
> > +
> > +    /* do the cos transform */
> > +    t1[0]  = (int32_t) t2[0] * consts[40 + 0];
> > +    t1[0] += (int32_t) t2[1] * consts[40 + 1];
> > +    t1[1]  = (int32_t) t2[0] * consts[40 + 2];
> > +    t1[1] += (int32_t) t2[1] * consts[40 + 3];
> > +    t1[2]  = (int32_t) t2[0] * consts[40 + 4];
> > +    t1[2] += (int32_t) t2[1] * consts[40 + 5];
> > +    t1[3]  = (int32_t) t2[0] * consts[40 + 6];
> > +    t1[3] += (int32_t) t2[1] * consts[40 + 7];
> > +
> > +    t1[0] += (int32_t) t2[2] * consts[40 + 8];
> > +    t1[0] += (int32_t) t2[3] * consts[40 + 9];
> > +    t1[1] += (int32_t) t2[2] * consts[40 + 10];
> > +    t1[1] += (int32_t) t2[3] * consts[40 + 11];
> > +    t1[2] += (int32_t) t2[2] * consts[40 + 12];
> > +    t1[2] += (int32_t) t2[3] * consts[40 + 13];
> > +    t1[3] += (int32_t) t2[2] * consts[40 + 14];
> > +    t1[3] += (int32_t) t2[3] * consts[40 + 15];
> > +
> > +    out[0] = t1[0] >>
> > +        (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS);
> > +    out[1] = t1[1] >>
> > +        (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS);
> > +    out[2] = t1[2] >>
> > +        (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS);
> > +    out[3] = t1[3] >>
> > +        (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS);
> > +}
> > +
> > +static void sbc_analyze_8_simd(const int16_t *in, int32_t *out,
> > +                               const int16_t *consts)
> > +{
> > +    int32_t t1[8];
> > +    int16_t t2[8];
> > +    int i, hop;
> > +
> > +    /* rounding coefficient */
> > +    t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] =
> > +        (int32_t) 1 << (SBC_PROTO_FIXED8_SCALE-1);
> > +
> > +    /* low pass polyphase filter */
> > +    for (hop = 0; hop < 80; hop += 16) {
> > +        t1[0] += (int32_t) in[hop] * consts[hop];
> > +        t1[0] += (int32_t) in[hop + 1] * consts[hop + 1];
> > +        t1[1] += (int32_t) in[hop + 2] * consts[hop + 2];
> > +        t1[1] += (int32_t) in[hop + 3] * consts[hop + 3];
> > +        t1[2] += (int32_t) in[hop + 4] * consts[hop + 4];
> > +        t1[2] += (int32_t) in[hop + 5] * consts[hop + 5];
> > +        t1[3] += (int32_t) in[hop + 6] * consts[hop + 6];
> > +        t1[3] += (int32_t) in[hop + 7] * consts[hop + 7];
> > +        t1[4] += (int32_t) in[hop + 8] * consts[hop + 8];
> > +        t1[4] += (int32_t) in[hop + 9] * consts[hop + 9];
> > +        t1[5] += (int32_t) in[hop + 10] * consts[hop + 10];
> > +        t1[5] += (int32_t) in[hop + 11] * consts[hop + 11];
> > +        t1[6] += (int32_t) in[hop + 12] * consts[hop + 12];
> > +        t1[6] += (int32_t) in[hop + 13] * consts[hop + 13];
> > +        t1[7] += (int32_t) in[hop + 14] * consts[hop + 14];
> > +        t1[7] += (int32_t) in[hop + 15] * consts[hop + 15];
> > +    }
> > +
> > +    /* scaling */
> > +    t2[0] = t1[0] >> SBC_PROTO_FIXED8_SCALE;
> > +    t2[1] = t1[1] >> SBC_PROTO_FIXED8_SCALE;
> > +    t2[2] = t1[2] >> SBC_PROTO_FIXED8_SCALE;
> > +    t2[3] = t1[3] >> SBC_PROTO_FIXED8_SCALE;
> > +    t2[4] = t1[4] >> SBC_PROTO_FIXED8_SCALE;
> > +    t2[5] = t1[5] >> SBC_PROTO_FIXED8_SCALE;
> > +    t2[6] = t1[6] >> SBC_PROTO_FIXED8_SCALE;
> > +    t2[7] = t1[7] >> SBC_PROTO_FIXED8_SCALE;
> > +
> > +
> > +    /* do the cos transform */
> > +    t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] = 0;
> > +
> > +    for (i = 0; i < 4; i++) {
> > +        t1[0] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 0];
> > +        t1[0] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 1];
> > +        t1[1] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 2];
> > +        t1[1] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 3];
> > +        t1[2] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 4];
> > +        t1[2] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 5];
> > +        t1[3] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 6];
> > +        t1[3] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 7];
> > +        t1[4] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 8];
> > +        t1[4] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 9];
> > +        t1[5] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 10];
> > +        t1[5] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 11];
> > +        t1[6] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 12];
> > +        t1[6] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 13];
> > +        t1[7] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 14];
> > +        t1[7] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 15];
> > +    }
> > +
> > +    for (i = 0; i < 8; i++)
> > +        out[i] = t1[i] >>
> > +            (SBC_COS_TABLE_FIXED8_SCALE - SCALE_OUT_BITS);
> > +}
> > +
> >
> 
> 
> What does it do here? A PQF into an FFT?
> I might investigate using lavc's fixed point mdct for this maybe, I hate
> custom fixed-point analysis transforms.

Sure, have a go. It would be great if it could be used.

> > +static inline void sbc_analyze_4b_4s_simd(SBCDSPContext *s,
> > +                                          int16_t *x, int32_t *out, int
> > out_stride)
> > +{
> > +    /* Analyze blocks */
> > +    s->sbc_analyze_4(x + 12, out, ff_sbcdsp_analysis_consts_
> > fixed4_simd_odd);
> > +    out += out_stride;
> > +    s->sbc_analyze_4(x + 8, out, ff_sbcdsp_analysis_consts_
> > fixed4_simd_even);
> > +    out += out_stride;
> > +    s->sbc_analyze_4(x + 4, out, ff_sbcdsp_analysis_consts_
> > fixed4_simd_odd);
> > +    out += out_stride;
> > +    s->sbc_analyze_4(x + 0, out, ff_sbcdsp_analysis_consts_
> > fixed4_simd_even);
> > +
> > +    emms_c();
> > +}
> > +
> > +static inline void sbc_analyze_4b_8s_simd(SBCDSPContext *s,
> > +                                          int16_t *x, int32_t *out, int
> > out_stride)
> > +{
> > +    /* Analyze blocks */
> > +    s->sbc_analyze_8(x + 24, out, ff_sbcdsp_analysis_consts_
> > fixed8_simd_odd);
> > +    out += out_stride;
> > +    s->sbc_analyze_8(x + 16, out, ff_sbcdsp_analysis_consts_
> > fixed8_simd_even);
> > +    out += out_stride;
> > +    s->sbc_analyze_8(x + 8, out, ff_sbcdsp_analysis_consts_
> > fixed8_simd_odd);
> > +    out += out_stride;
> > +    s->sbc_analyze_8(x + 0, out, ff_sbcdsp_analysis_consts_
> > fixed8_simd_even);
> > +
> > +    emms_c();
> > +}
> > +
> > +static inline void sbc_analyze_1b_8s_simd_even(SBCDSPContext *s,
> > +                                               int16_t *x, int32_t *out,
> > +                                               int out_stride);
> > +
> > +static inline void sbc_analyze_1b_8s_simd_odd(SBCDSPContext *s,
> > +                                              int16_t *x, int32_t *out,
> > +                                              int out_stride)
> > +{
> > +    s->sbc_analyze_8(x, out, ff_sbcdsp_analysis_consts_fixed8_simd_odd);
> > +    s->sbc_analyze_8s = sbc_analyze_1b_8s_simd_even;
> > +
> > +    emms_c();
> > +}
> > +
> > +static inline void sbc_analyze_1b_8s_simd_even(SBCDSPContext *s,
> > +                                               int16_t *x, int32_t *out,
> > +                                               int out_stride)
> > +{
> > +    s->sbc_analyze_8(x, out, ff_sbcdsp_analysis_consts_fixed8_simd_even);
> > +    s->sbc_analyze_8s = sbc_analyze_1b_8s_simd_odd;
> > +
> > +    emms_c();
> > +}
> > +
> > +#define PCM(i)  AV_RN16(pcm + 2*(i))
> >
> 
> Don't use a define, just substitute it directly.

OK.

> > +
> > +/*
> > + * Internal helper functions for input data processing. In order to get
> > + * optimal performance, it is important to have "nsamples" and "nchannels"
> > + * arguments used with this inline function as compile time constants.
> > + */
> > +
> > +static av_always_inline int sbc_encoder_process_input_s4_internal(
> > +    int position, const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE],
> > +    int nsamples, int nchannels)
> > +{
> > +    /* handle X buffer wraparound */
> > +    if (position < nsamples) {
> > +        if (nchannels > 0)
> > +            memcpy(&X[0][SBC_X_BUFFER_SIZE - 40], &X[0][position],
> > +                            36 * sizeof(int16_t));
> > +        if (nchannels > 1)
> > +            memcpy(&X[1][SBC_X_BUFFER_SIZE - 40], &X[1][position],
> > +                            36 * sizeof(int16_t));
> > +        position = SBC_X_BUFFER_SIZE - 40;
> > +    }
> > +
> > +    /* copy/permutate audio samples */
> > +    while ((nsamples -= 8) >= 0) {
> > +        position -= 8;
> > +        if (nchannels > 0) {
> > +            int16_t *x = &X[0][position];
> > +            x[0]  = PCM(0 + 7 * nchannels);
> > +            x[1]  = PCM(0 + 3 * nchannels);
> > +            x[2]  = PCM(0 + 6 * nchannels);
> > +            x[3]  = PCM(0 + 4 * nchannels);
> > +            x[4]  = PCM(0 + 0 * nchannels);
> > +            x[5]  = PCM(0 + 2 * nchannels);
> > +            x[6]  = PCM(0 + 1 * nchannels);
> > +            x[7]  = PCM(0 + 5 * nchannels);
> > +        }
> > +        if (nchannels > 1) {
> > +            int16_t *x = &X[1][position];
> > +            x[0]  = PCM(1 + 7 * nchannels);
> > +            x[1]  = PCM(1 + 3 * nchannels);
> > +            x[2]  = PCM(1 + 6 * nchannels);
> > +            x[3]  = PCM(1 + 4 * nchannels);
> > +            x[4]  = PCM(1 + 0 * nchannels);
> > +            x[5]  = PCM(1 + 2 * nchannels);
> > +            x[6]  = PCM(1 + 1 * nchannels);
> > +            x[7]  = PCM(1 + 5 * nchannels);
> > +        }
> > +        pcm += 16 * nchannels;
> > +    }
> > +
> > +    return position;
> > +}
> > +
> > +static av_always_inline int sbc_encoder_process_input_s8_internal(
> > +    int position, const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE],
> > +    int nsamples, int nchannels)
> > +{
> > +    /* handle X buffer wraparound */
> > +    if (position < nsamples) {
> > +        if (nchannels > 0)
> > +            memcpy(&X[0][SBC_X_BUFFER_SIZE - 72], &X[0][position],
> > +                            72 * sizeof(int16_t));
> > +        if (nchannels > 1)
> > +            memcpy(&X[1][SBC_X_BUFFER_SIZE - 72], &X[1][position],
> > +                            72 * sizeof(int16_t));
> > +        position = SBC_X_BUFFER_SIZE - 72;
> > +    }
> > +
> > +    if (position % 16 == 8) {
> > +        position -= 8;
> > +        nsamples -= 8;
> > +        if (nchannels > 0) {
> > +            int16_t *x = &X[0][position];
> > +            x[0]  = PCM(0 + (15-8) * nchannels);
> > +            x[2]  = PCM(0 + (14-8) * nchannels);
> > +            x[3]  = PCM(0 + (8-8) * nchannels);
> > +            x[4]  = PCM(0 + (13-8) * nchannels);
> > +            x[5]  = PCM(0 + (9-8) * nchannels);
> > +            x[6]  = PCM(0 + (12-8) * nchannels);
> > +            x[7]  = PCM(0 + (10-8) * nchannels);
> > +            x[8]  = PCM(0 + (11-8) * nchannels);
> > +        }
> > +        if (nchannels > 1) {
> > +            int16_t *x = &X[1][position];
> > +            x[0]  = PCM(1 + (15-8) * nchannels);
> > +            x[2]  = PCM(1 + (14-8) * nchannels);
> > +            x[3]  = PCM(1 + (8-8) * nchannels);
> > +            x[4]  = PCM(1 + (13-8) * nchannels);
> > +            x[5]  = PCM(1 + (9-8) * nchannels);
> > +            x[6]  = PCM(1 + (12-8) * nchannels);
> > +            x[7]  = PCM(1 + (10-8) * nchannels);
> > +            x[8]  = PCM(1 + (11-8) * nchannels);
> > +        }
> > +
> > +        pcm += 16 * nchannels;
> > +    }
> > +
> > +    /* copy/permutate audio samples */
> > +    while (nsamples >= 16) {
> > +        position -= 16;
> > +        if (nchannels > 0) {
> > +            int16_t *x = &X[0][position];
> > +            x[0]  = PCM(0 + 15 * nchannels);
> > +            x[1]  = PCM(0 + 7 * nchannels);
> > +            x[2]  = PCM(0 + 14 * nchannels);
> > +            x[3]  = PCM(0 + 8 * nchannels);
> > +            x[4]  = PCM(0 + 13 * nchannels);
> > +            x[5]  = PCM(0 + 9 * nchannels);
> > +            x[6]  = PCM(0 + 12 * nchannels);
> > +            x[7]  = PCM(0 + 10 * nchannels);
> > +            x[8]  = PCM(0 + 11 * nchannels);
> > +            x[9]  = PCM(0 + 3 * nchannels);
> > +            x[10] = PCM(0 + 6 * nchannels);
> > +            x[11] = PCM(0 + 0 * nchannels);
> > +            x[12] = PCM(0 + 5 * nchannels);
> > +            x[13] = PCM(0 + 1 * nchannels);
> > +            x[14] = PCM(0 + 4 * nchannels);
> > +            x[15] = PCM(0 + 2 * nchannels);
> > +        }
> > +        if (nchannels > 1) {
> > +            int16_t *x = &X[1][position];
> > +            x[0]  = PCM(1 + 15 * nchannels);
> > +            x[1]  = PCM(1 + 7 * nchannels);
> > +            x[2]  = PCM(1 + 14 * nchannels);
> > +            x[3]  = PCM(1 + 8 * nchannels);
> > +            x[4]  = PCM(1 + 13 * nchannels);
> > +            x[5]  = PCM(1 + 9 * nchannels);
> > +            x[6]  = PCM(1 + 12 * nchannels);
> > +            x[7]  = PCM(1 + 10 * nchannels);
> > +            x[8]  = PCM(1 + 11 * nchannels);
> > +            x[9]  = PCM(1 + 3 * nchannels);
> > +            x[10] = PCM(1 + 6 * nchannels);
> > +            x[11] = PCM(1 + 0 * nchannels);
> > +            x[12] = PCM(1 + 5 * nchannels);
> > +            x[13] = PCM(1 + 1 * nchannels);
> > +            x[14] = PCM(1 + 4 * nchannels);
> > +            x[15] = PCM(1 + 2 * nchannels);
> > +        }
> > +        pcm += 32 * nchannels;
> > +        nsamples -= 16;
> > +    }
> > +
> > +    if (nsamples == 8) {
> > +        position -= 8;
> > +        if (nchannels > 0) {
> > +            int16_t *x = &X[0][position];
> > +            x[-7] = PCM(0 + 7 * nchannels);
> > +            x[1]  = PCM(0 + 3 * nchannels);
> > +            x[2]  = PCM(0 + 6 * nchannels);
> > +            x[3]  = PCM(0 + 0 * nchannels);
> > +            x[4]  = PCM(0 + 5 * nchannels);
> > +            x[5]  = PCM(0 + 1 * nchannels);
> > +            x[6]  = PCM(0 + 4 * nchannels);
> > +            x[7]  = PCM(0 + 2 * nchannels);
> > +        }
> > +        if (nchannels > 1) {
> > +            int16_t *x = &X[1][position];
> > +            x[-7] = PCM(1 + 7 * nchannels);
> > +            x[1]  = PCM(1 + 3 * nchannels);
> > +            x[2]  = PCM(1 + 6 * nchannels);
> > +            x[3]  = PCM(1 + 0 * nchannels);
> > +            x[4]  = PCM(1 + 5 * nchannels);
> > +            x[5]  = PCM(1 + 1 * nchannels);
> > +            x[6]  = PCM(1 + 4 * nchannels);
> > +            x[7]  = PCM(1 + 2 * nchannels);
> > +        }
> > +    }
> > +
> > +    return position;
> > +}
> > +
> > +/*
> > + * Input data processing functions. The data is endian converted if
> > needed,
> > + * channels are deintrleaved and audio samples are reordered for use in
> > + * SIMD-friendly analysis filter function. The results are put into "X"
> > + * array, getting appended to the previous data (or it is better to say
> > + * prepended, as the buffer is filled from top to bottom). Old data is
> > + * discarded when neededed, but availability of (10 * nrof_subbands)
> > + * contiguous samples is always guaranteed for the input to the analysis
> > + * filter. This is achieved by copying a sufficient part of old data
> > + * to the top of the buffer on buffer wraparound.
> > + */
> > +
> > +static int sbc_enc_process_input_4s(int position, const uint8_t *pcm,
> > +                                    int16_t X[2][SBC_X_BUFFER_SIZE],
> > +                                    int nsamples, int nchannels)
> > +{
> > +    if (nchannels > 1)
> > +        return sbc_encoder_process_input_s4_internal(
> > +            position, pcm, X, nsamples, 2);
> > +    else
> > +        return sbc_encoder_process_input_s4_internal(
> > +            position, pcm, X, nsamples, 1);
> > +}
> >
> 
> That's just silly, do
> return sbc_encoder_process_input_s4_internal(position, pcm, X, nsamples, 1
> + (nchannels > 1));

The point was to get the sbc_encoder_process_input_s4_internal inlined
in 2 different ways depending on its last parameter (constant), for
compiler optimization purpose.

> Or better yet remove the wrapper function.

That's what I did, without significant performance difference.
I guess compliers got better at optimizing this kind of code since
it was first written.

> > +
> > +static int sbc_enc_process_input_8s(int position, const uint8_t *pcm,
> > +                                    int16_t X[2][SBC_X_BUFFER_SIZE],
> > +                                    int nsamples, int nchannels)
> > +{
> > +    if (nchannels > 1)
> > +        return sbc_encoder_process_input_s8_internal(
> > +            position, pcm, X, nsamples, 2);
> > +    else
> > +        return sbc_encoder_process_input_s8_internal(
> > +            position, pcm, X, nsamples, 1);
> > +}
> > +
> >
> 
> Same here.

Wrapper removed.

> >
> > +++ b/libavcodec/sbcdsp_data.c
> > @@ -0,0 +1,335 @@
> > +/*
> > + * Bluetooth low-complexity, subband codec (SBC)
> > + *
> > + * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
> > + * Copyright (C) 2008-2010  Nokia Corporation
> > + * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
> > + * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
> > + * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> > 02110-1301 USA
> > + */
> > +
> > +/**
> > + * @file
> > + * miscellaneous SBC tables
> > + */
> > +
> > +#include "sbcdsp_data.h"
> > +
> > +#define F_PROTO4(x) (int32_t) ((x * 2) * \
> > +    ((int32_t) 1 << (sizeof(int16_t) * CHAR_BIT - 1)) + 0.5)
> > +#define F_COS4(x) (int32_t) ((x) * \
> > +    ((int32_t) 1 << (sizeof(int16_t) * CHAR_BIT - 1)) + 0.5)
> > +#define F_PROTO8(x) (int32_t) ((x * 2) * \
> > +    ((int32_t) 1 << (sizeof(int16_t) * CHAR_BIT - 1)) + 0.5)
> > +#define F_COS8(x) (int32_t) ((x) * \
> > +    ((int32_t) 1 << (sizeof(int16_t) * CHAR_BIT - 1)) + 0.5)
> > +
> >
> 
> 
> We require 8 bit bytes, so s/CHAR_BIT/8/g throughout.

OK.

> +++ b/libavcodec/sbcdsp_data.h
> > @@ -0,0 +1,57 @@
> > +/*
> > + * Bluetooth low-complexity, subband codec (SBC)
> > + *
> > + * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
> > + * Copyright (C) 2008-2010  Nokia Corporation
> > + * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
> > + * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
> > + * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> > 02110-1301 USA
> > + */
> > +
> > +/**
> > + * @file
> > + * miscellaneous SBC tables
> > + */
> > +
> > +#ifndef AVCODEC_SBCDSP_DATA_H
> > +#define AVCODEC_SBCDSP_DATA_H
> > +
> > +#include "sbc.h"
> > +
> > +#define SBC_PROTO_FIXED4_SCALE      ((sizeof(int16_t) * CHAR_BIT - 1) + 1)
> > +#define SBC_COS_TABLE_FIXED4_SCALE  ((sizeof(int16_t) * CHAR_BIT - 1)    )
> > +#define SBC_PROTO_FIXED8_SCALE      ((sizeof(int16_t) * CHAR_BIT - 1) + 1)
> > +#define SBC_COS_TABLE_FIXED8_SCALE  ((sizeof(int16_t) * CHAR_BIT - 1)    )
> > +
> >
> 
> Same

OK.

> >
> > +
> > +    /* align the last crc byte */
> > +    if (crc_pos % 8)
> > +        crc_header[crc_pos >> 3] <<= 8 - (crc_pos % 8);
> >
> >
> put_bits_align?

I gave it a try but it made the code more complex and less readable,
so I kept the code as it was.

> > +    avpkt->data[3] = ff_sbc_crc8(crc_header, crc_pos);
> > +
> > +    ff_sbc_calculate_bits(frame, bits);
> > +
> > +    for (ch = 0; ch < frame_channels; ch++) {
> > +        for (sb = 0; sb < frame_subbands; sb++) {
> > +            levels[ch][sb] = ((1 << bits[ch][sb]) - 1) <<
> > +                (32 - (frame->scale_factor[ch][sb] +
> > +                    SCALE_OUT_BITS + 2));
> > +            sb_sample_delta[ch][sb] = (uint32_t) 1 <<
> > +                (frame->scale_factor[ch][sb] +
> > +                    SCALE_OUT_BITS + 1);
> > +        }
> > +    }
> > +
> > +    for (blk = 0; blk < frame->blocks; blk++) {
> > +        for (ch = 0; ch < frame_channels; ch++) {
> > +            for (sb = 0; sb < frame_subbands; sb++) {
> > +
> > +                if (bits[ch][sb] == 0)
> > +                    continue;
> > +
> > +                audio_sample = ((uint64_t) levels[ch][sb] *
> > +                    (sb_sample_delta[ch][sb] +
> > +                    frame->sb_sample_f[blk][ch][sb])) >> 32;
> > +
> > +                put_bits(&pb, bits[ch][sb], audio_sample);
> > +            }
> > +        }
> > +    }
> > +
> > +    flush_put_bits(&pb);
> > +
> > +    return (put_bits_count(&pb) + 7) / 8;
> > +}
> > +
> > +static size_t sbc_pack_frame(AVPacket *avpkt, struct sbc_frame *frame,
> > int joint)
> > +{
> > +    int frame_subbands = 4;
> > +
> > +    avpkt->data[0] = SBC_SYNCWORD;
> > +
> > +    avpkt->data[1] = (frame->frequency & 0x03) << 6;
> > +    avpkt->data[1] |= (frame->block_mode & 0x03) << 4;
> > +    avpkt->data[1] |= (frame->mode & 0x03) << 2;
> > +    avpkt->data[1] |= (frame->allocation & 0x01) << 1;
> > +
> >
> 
> Use put_bits?

For just writting flags in one byte ? This seems overkill !

> > +
> > +    if (frame->subbands == 4) {
> > +        if (frame->channels == 1)
> > +            return sbc_pack_frame_internal(avpkt, frame, 4, 1, joint);
> >
> 
> return sbc_pack_frame_internal(avpkt, frame, 4, 1 + (frame->channels == 1),
> joint);
> 
> 
> > +            return sbc_pack_frame_internal(avpkt, frame, 8, 1, joint);
> > +        else
> > +            return sbc_pack_frame_internal(avpkt, frame, 8, 2, joint);
> >
> 
> return sbc_pack_frame_internal(avpkt, frame, 8, 1 + (frame->channels == 1),
> joint);

OK, improved with a single call to sbc_pack_frame_internal.

> > +    }
> > +}
> > +
> > +static size_t msbc_pack_frame(AVPacket *avpkt, struct sbc_frame *frame,
> > int joint)
> > +{
> > +    avpkt->data[0] = MSBC_SYNCWORD;
> > +    avpkt->data[1] = 0;
> > +    avpkt->data[2] = 0;
> > +
> > +    return sbc_pack_frame_internal(avpkt, frame, 8, 1, joint);
> > +}
> > +
> > +static void sbc_encoder_init(bool msbc, SBCDSPContext *s,
> > +                             const struct sbc_frame *frame)
> > +{
> > +    memset(&s->X, 0, sizeof(s->X));
> > +    s->position = (SBC_X_BUFFER_SIZE - frame->subbands * 9) & ~7;
> > +    if (msbc)
> > +        s->increment = 1;
> > +    else
> > +        s->increment = 4;
> 
> +
> >
> 
> Save a line, use a ternary.

OK.

> > +
> > +    sbc->pack_frame = sbc_pack_frame;
> > +
> > +    sbc->frequency = SBC_FREQ_44100;
> >
> 
> 
> Yet in the AVCodec structure the encoder specifies it supports 16khz, 32khz
> and 48khz.

Indeed, forcing to 44100 was a leftover. SBC actually support 16, 32,
44.1 and 48 kHz.

> You should remove the SBC_FREQ macros and use avctx->sample_rate directly.
> Also remove any unsupported samplerates.

Those macros correspond to the actual values that have to be written in
the SBC bitstream.

> > +    sbc->mode = SBC_MODE_STEREO;
> > +    if (sbc->joint_stereo)
> > +        sbc->mode = SBC_MODE_JOINT_STEREO;
> > +    else if (sbc->dual_channel)
> > +        sbc->mode = SBC_MODE_DUAL_CHANNEL;
> > +    sbc->subbands >>= 3;
> > +    sbc->blocks = (sbc->blocks >> 2) - 1;
> > +
> > +    if (!avctx->frame_size)
> > +        avctx->frame_size = 4*(sbc->subbands + 1) * 4*(sbc->blocks + 1);
> > +
> > +    for (int i = 0; avctx->codec->supported_samplerates[i]; i++)
> > +        if (avctx->sample_rate == avctx->codec->supported_samplerates[i])
> > +            sbc->frequency = i;
> > +
> > +    if (avctx->channels == 1)
> > +        sbc->mode = SBC_MODE_MONO;
> > +
> > +    return 0;
> > +}
> > +
> > +static int msbc_encode_init(AVCodecContext *avctx)
> > +{
> > +    SBCEncContext *sbc = avctx->priv_data;
> > +
> > +    sbc->msbc = true;
> > +    sbc->pack_frame = msbc_pack_frame;
> > +
> > +    sbc->frequency = SBC_FREQ_16000;
> > +    sbc->blocks = MSBC_BLOCKS;
> > +    sbc->subbands = SBC_SB_8;
> > +    sbc->mode = SBC_MODE_MONO;
> > +    sbc->allocation = SBC_AM_LOUDNESS;
> > +    sbc->bitpool = 26;
> > +
> > +    if (!avctx->frame_size)
> > +        avctx->frame_size = 8 * MSBC_BLOCKS;
> > +
> >
> 
> Does the encoder actually accept arbitrary custom frame sizes?

Indeed no. Fixed.

> >
> > +
> > +#if CONFIG_SBC_ENCODER
> > +AVCodec ff_sbc_encoder = {
> > +    .name                  = "sbc",
> > +    .long_name             = NULL_IF_CONFIG_SMALL("SBC (low-complexity
> > subband codec)"),
> > +    .type                  = AVMEDIA_TYPE_AUDIO,
> > +    .id                    = AV_CODEC_ID_SBC,
> > +    .priv_data_size        = sizeof(SBCEncContext),
> > +    .init                  = sbc_encode_init,
> > +    .encode2               = sbc_encode_frame,
> > +    .channel_layouts       = (const uint64_t[]) { AV_CH_LAYOUT_MONO,
> > +                                                  AV_CH_LAYOUT_STEREO, 0},
> > +    .sample_fmts           = (const enum AVSampleFormat[]) {
> > AV_SAMPLE_FMT_S16,
> > +
> >  AV_SAMPLE_FMT_NONE },
> >
> 
> Planar?

Not quite. The whole MMX / arm code is written for interlaced input and
I don't plane to rewrite it.

> > +    .supported_samplerates = (const int[]) { 16000, 32000, 44100, 48000,
> > 0 },
> >
> 
> Remove the samplerates the encoder doesn't support.

The encoder actually support all those samplerates.

> Also add the internal codec cap about threadsafe init since the encoder
> doesn't init any global tables to both this and the aptX encoders.

Done.

> > +
> > +;*******************************************************************
> > +;void ff_sbc_analyze_4(const int16_t *in, int32_t *out, const int16_t
> > *consts);
> > +;*******************************************************************
> > +INIT_MMX mmx
> > +cglobal sbc_analyze_4, 3, 3, 4, in, out, consts
> > +    movq          m0, [inq]
> > +    movq          m1, [inq+8]
> > +    pmaddwd       m0, [constsq]
> > +    pmaddwd       m1, [constsq+8]
> > +    paddd         m0, [scale_mask]
> > +    paddd         m1, [scale_mask]
> > +
> > +    movq          m2, [inq+16]
> > +    movq          m3, [inq+24]
> > +    pmaddwd       m2, [constsq+16]
> > +    pmaddwd       m3, [constsq+24]
> > +    paddd         m0, m2
> > +    paddd         m1, m3
> > +
> > +    movq          m2, [inq+32]
> > +    movq          m3, [inq+40]
> > +    pmaddwd       m2, [constsq+32]
> > +    pmaddwd       m3, [constsq+40]
> > +    paddd         m0, m2
> > +    paddd         m1, m3
> > +
> > +    movq          m2, [inq+48]
> > +    movq          m3, [inq+56]
> > +    pmaddwd       m2, [constsq+48]
> > +    pmaddwd       m3, [constsq+56]
> > +    paddd         m0, m2
> > +    paddd         m1, m3
> > +
> > +    movq          m2, [inq+64]
> > +    movq          m3, [inq+72]
> > +    pmaddwd       m2, [constsq+64]
> > +    pmaddwd       m3, [constsq+72]
> > +    paddd         m0, m2
> > +    paddd         m1, m3
> > +
> >
> 
> Loops?
> 
> 
> > +    psrad         m0, 16    ; SBC_PROTO_FIXED4_SCALE
> > +    psrad         m1, 16    ; SBC_PROTO_FIXED4_SCALE
> > +    packssdw      m0, m0
> > +    packssdw      m1, m1
> > +
> > +    movq          m2, m0
> > +    pmaddwd       m0, [constsq+80]
> > +    pmaddwd       m2, [constsq+88]
> > +
> > +    movq          m3, m1
> > +    pmaddwd       m1, [constsq+96]
> > +    pmaddwd       m3, [constsq+104]
> > +    paddd         m0, m1
> > +    paddd         m2, m3
> > +
> > +    movq          [outq  ], m0
> > +    movq          [outq+8], m2
> > +
> > +    RET
> > +
> > +
> > +
> > +;*******************************************************************
> > +;void ff_sbc_analyze_8(const int16_t *in, int32_t *out, const int16_t
> > *consts);
> > +;*******************************************************************
> > +INIT_MMX mmx
> > +cglobal sbc_analyze_8, 3, 3, 4, in, out, consts
> > +    movq          m0, [inq]
> > +    movq          m1, [inq+8]
> > +    movq          m2, [inq+16]
> > +    movq          m3, [inq+24]
> > +    pmaddwd       m0, [constsq]
> > +    pmaddwd       m1, [constsq+8]
> > +    pmaddwd       m2, [constsq+16]
> > +    pmaddwd       m3, [constsq+24]
> > +    paddd         m0, [scale_mask]
> > +    paddd         m1, [scale_mask]
> > +    paddd         m2, [scale_mask]
> > +    paddd         m3, [scale_mask]
> > +
> > +    movq          m4, [inq+32]
> > +    movq          m5, [inq+40]
> > +    movq          m6, [inq+48]
> > +    movq          m7, [inq+56]
> > +    pmaddwd       m4, [constsq+32]
> > +    pmaddwd       m5, [constsq+40]
> > +    pmaddwd       m6, [constsq+48]
> > +    pmaddwd       m7, [constsq+56]
> > +    paddd         m0, m4
> > +    paddd         m1, m5
> > +    paddd         m2, m6
> > +    paddd         m3, m7
> > +
> > +    movq          m4, [inq+64]
> > +    movq          m5, [inq+72]
> > +    movq          m6, [inq+80]
> > +    movq          m7, [inq+88]
> > +    pmaddwd       m4, [constsq+64]
> > +    pmaddwd       m5, [constsq+72]
> > +    pmaddwd       m6, [constsq+80]
> > +    pmaddwd       m7, [constsq+88]
> > +    paddd         m0, m4
> > +    paddd         m1, m5
> > +    paddd         m2, m6
> > +    paddd         m3, m7
> > +
> > +    movq          m4, [inq+96]
> > +    movq          m5, [inq+104]
> > +    movq          m6, [inq+112]
> > +    movq          m7, [inq+120]
> > +    pmaddwd       m4, [constsq+96]
> > +    pmaddwd       m5, [constsq+104]
> > +    pmaddwd       m6, [constsq+112]
> > +    pmaddwd       m7, [constsq+120]
> > +    paddd         m0, m4
> > +    paddd         m1, m5
> > +    paddd         m2, m6
> > +    paddd         m3, m7
> > +
> > +    movq          m4, [inq+128]
> > +    movq          m5, [inq+136]
> > +    movq          m6, [inq+144]
> > +    movq          m7, [inq+152]
> > +    pmaddwd       m4, [constsq+128]
> > +    pmaddwd       m5, [constsq+136]
> > +    pmaddwd       m6, [constsq+144]
> > +    pmaddwd       m7, [constsq+152]
> > +    paddd         m0, m4
> > +    paddd         m1, m5
> > +    paddd         m2, m6
> > +    paddd         m3, m7
> > +
> > +    psrad         m0, 16    ; SBC_PROTO_FIXED8_SCALE
> > +    psrad         m1, 16    ; SBC_PROTO_FIXED8_SCALE
> > +    psrad         m2, 16    ; SBC_PROTO_FIXED8_SCALE
> > +    psrad         m3, 16    ; SBC_PROTO_FIXED8_SCALE
> > +
> > +    packssdw      m0, m0
> > +    packssdw      m1, m1
> > +    packssdw      m2, m2
> > +    packssdw      m3, m3
> > +
> > +    movq          m4, m0
> > +    movq          m5, m0
> > +    pmaddwd       m4, [constsq+160]
> > +    pmaddwd       m5, [constsq+168]
> > +
> > +    movq          m6, m1
> > +    movq          m7, m1
> > +    pmaddwd       m6, [constsq+192]
> > +    pmaddwd       m7, [constsq+200]
> > +    paddd         m4, m6
> > +    paddd         m5, m7
> > +
> > +    movq          m6, m2
> > +    movq          m7, m2
> > +    pmaddwd       m6, [constsq+224]
> > +    pmaddwd       m7, [constsq+232]
> > +    paddd         m4, m6
> > +    paddd         m5, m7
> > +
> > +    movq          m6, m3
> > +    movq          m7, m3
> > +    pmaddwd       m6, [constsq+256]
> > +    pmaddwd       m7, [constsq+264]
> > +    paddd         m4, m6
> > +    paddd         m5, m7
> > +
> > +    movq          [outq  ], m4
> > +    movq          [outq+8], m5
> > +
> > +    movq          m5, m0
> > +    pmaddwd       m0, [constsq+176]
> > +    pmaddwd       m5, [constsq+184]
> > +
> > +    movq          m7, m1
> > +    pmaddwd       m1, [constsq+208]
> > +    pmaddwd       m7, [constsq+216]
> > +    paddd         m0, m1
> > +    paddd         m5, m7
> > +
> > +    movq          m7, m2
> > +    pmaddwd       m2, [constsq+240]
> > +    pmaddwd       m7, [constsq+248]
> > +    paddd         m0, m2
> > +    paddd         m5, m7
> > +
> > +    movq          m7, m3
> > +    pmaddwd       m3, [constsq+272]
> > +    pmaddwd       m7, [constsq+280]
> > +    paddd         m0, m3
> > +    paddd         m5, m7
> > +
> 
> 
> Has the person writing the SIMD seriously not heard of loops?

I guess this person actually did loop unrolling on purpose.

> I see no reason for this to not work on larger registers if loops were used
> here.
> This seems trivial do to properly so if you can't be bothered to fix it
> leave it to me or jamrial to do after the core of the encoder has been
> merged.

I will leave it to you.

> > +    movq          [outq+16], m0
> > +    movq          [outq+24], m5
> > +
> > +    RET
> > +
> > +
> > +;*******************************************************************
> > +;void ff_sbc_calc_scalefactors(int32_t sb_sample_f[16][2][8],
> > +;                              uint32_t scale_factor[2][8],
> > +;                              int blocks, int channels, int subbands)
> > +;*******************************************************************
> > +INIT_MMX mmx
> > +cglobal sbc_calc_scalefactors, 5, 9, 3, sb_sample_f, scale_factor,
> > blocks, channels, subbands, ch, sb, sa, sf, blk
> > +    shl           channelsd, 5
> > +    mov           chq, 0
> > +.loop_1:
> > +    lea           saq, [sb_sample_fq + chq]
> > +    lea           sfq, [scale_factorq + chq]
> > +
> > +    mov           sbd, 0
> > +.loop_2:
> > +    ; blk = (blocks - 1) * 64;
> > +    lea           blkq, [blocksq - 1]
> > +    shl           blkd, 6
> > +
> > +    movq          m0, [scale_mask]
> > +.loop_3:
> > +    movq          m1, [saq+blkq]
> > +    pxor          m2, m2
> > +    pcmpgtd       m1, m2
> > +    paddd         m1, [saq+blkq]
> > +    pcmpgtd       m2, m1
> > +    pxor          m1, m2
> > +
> > +    por           m0, m1
> > +
> > +    sub           blkd, 64
> > +    jns           .loop_3
> > +
> > +    movd          blkd, m0
> > +    psrlq         m0,   32
> > +    bsr           blkd, blkd
> > +    sub           blkd, 15    ; SCALE_OUT_BITS
> > +    mov           [sfq], blkd
> > +
> > +    movd          blkd, m0
> > +    bsr           blkd, blkd
> > +    sub           blkd, 15    ; SCALE_OUT_BITS
> > +    mov           [sfq+4], blkd
> > +
> > +    add           saq, 8
> > +    add           sfq, 8
> > +
> > +    add           sbd, 2
> > +    cmp           sbd, subbandsd
> > +    jl            .loop_2
> > +
> > +    add           chd, 32
> > +    cmp           chd, channelsd
> > +    jl            .loop_1
> > +
> >
> 
> This function's hardly doing SIMD and I would like to see comparison to the
> C version before accepting it. I somehow doubt it'll be faster.

It is actually slightly faster.
Here is the best speed I get encoding one file with default settings.
C version:    speed= 723x
MMX version:  speed= 756x

> > +av_cold void ff_sbcdsp_init_x86(SBCDSPContext *s)
> > +{
> > +    int cpu_flags = av_get_cpu_flags();
> > +
> > +    if (EXTERNAL_MMX(cpu_flags)) {
> > +        s->sbc_analyze_4 = ff_sbc_analyze_4_mmx;
> > +        s->sbc_analyze_8 = ff_sbc_analyze_8_mmx;
> > +        s->sbc_calc_scalefactors = ff_sbc_calc_scalefactors_mmx;
> > +    }
> > +}
> >
> 
> 
> MMX? In this day and age?

Well, this code is not very recent...
But throwing some AVX in there would probably be nice if you feel
inclined.

> Anyway, its mostly not bad, will need some work before its cleaned of
> libsbc's NIH.

Should be better in the patchset that I will send soon.
Aurelien Jacobs Dec. 17, 2017, 9:43 p.m. UTC | #7
On Mon, Nov 06, 2017 at 04:22:30AM +0100, Michael Niedermayer wrote:
> Hi 
> 
> On Mon, Nov 06, 2017 at 12:35:18AM +0100, Aurelien Jacobs wrote:
> > This was originally based on libsbc, and was fully integrated into ffmpeg.
> > ---
> >  doc/general.texi                 |   2 +
> >  libavcodec/Makefile              |   4 +
> >  libavcodec/allcodecs.c           |   2 +
> >  libavcodec/arm/Makefile          |   3 +
> >  libavcodec/arm/sbcdsp_armv6.S    | 245 ++++++++++++++
> >  libavcodec/arm/sbcdsp_init_arm.c | 105 ++++++
> >  libavcodec/arm/sbcdsp_neon.S     | 714 +++++++++++++++++++++++++++++++++++++++
> >  libavcodec/avcodec.h             |   2 +
> >  libavcodec/codec_desc.c          |  12 +
> >  libavcodec/sbc.c                 | 316 +++++++++++++++++
> >  libavcodec/sbc.h                 | 121 +++++++
> >  libavcodec/sbcdec.c              | 469 +++++++++++++++++++++++++
> >  libavcodec/sbcdec_data.c         | 127 +++++++
> >  libavcodec/sbcdec_data.h         |  44 +++
> >  libavcodec/sbcdsp.c              | 569 +++++++++++++++++++++++++++++++
> >  libavcodec/sbcdsp.h              |  86 +++++
> >  libavcodec/sbcdsp_data.c         | 335 ++++++++++++++++++
> >  libavcodec/sbcdsp_data.h         |  57 ++++
> >  libavcodec/sbcenc.c              | 461 +++++++++++++++++++++++++
> >  libavcodec/x86/Makefile          |   2 +
> >  libavcodec/x86/sbcdsp.asm        | 290 ++++++++++++++++
> >  libavcodec/x86/sbcdsp_init.c     |  51 +++
> >  22 files changed, 4017 insertions(+)
> >  create mode 100644 libavcodec/arm/sbcdsp_armv6.S
> >  create mode 100644 libavcodec/arm/sbcdsp_init_arm.c
> >  create mode 100644 libavcodec/arm/sbcdsp_neon.S
> >  create mode 100644 libavcodec/sbc.c
> >  create mode 100644 libavcodec/sbc.h
> >  create mode 100644 libavcodec/sbcdec.c
> >  create mode 100644 libavcodec/sbcdec_data.c
> >  create mode 100644 libavcodec/sbcdec_data.h
> >  create mode 100644 libavcodec/sbcdsp.c
> >  create mode 100644 libavcodec/sbcdsp.h
> >  create mode 100644 libavcodec/sbcdsp_data.c
> >  create mode 100644 libavcodec/sbcdsp_data.h
> >  create mode 100644 libavcodec/sbcenc.c
> >  create mode 100644 libavcodec/x86/sbcdsp.asm
> >  create mode 100644 libavcodec/x86/sbcdsp_init.c
> 
> this seems to fail to build on x86-32

Ooops... Haven't use x86-32 for so long that it didn't even occured to
me to test it.

>         libavcodec/x86/sbcdsp_init.o
> src/libavcodec/x86/sbcdsp.asm:251: error: invalid operands in non-64-bit mode
> src/libavcodec/x86/sbcdsp.asm:264: error: invalid operands in non-64-bit mode
> src/libavcodec/x86/sbcdsp.asm:267: error: invalid operands in non-64-bit mode
> src/libavcodec/x86/sbcdsp.asm:269: error: invalid operands in non-64-bit mode
> src/libavcodec/x86/sbcdsp.asm:270: error: invalid operands in non-64-bit mode
> src/libavcodec/x86/sbcdsp.asm:271: error: invalid operands in non-64-bit mode
> src/libavcodec/x86/sbcdsp.asm:273: error: invalid operands in non-64-bit mode
> src/libavcodec/x86/sbcdsp.asm:274: error: invalid operands in non-64-bit mode
> src/libavcodec/x86/sbcdsp.asm:275: error: invalid operands in non-64-bit mode
> src/libavcodec/x86/sbcdsp.asm:276: error: invalid operands in non-64-bit mode
> STRIP   libavcodec/x86/opus_pvq_search.o

Fixed in upcoming patchset.
Aurelien Jacobs Dec. 17, 2017, 9:44 p.m. UTC | #8
On Mon, Nov 06, 2017 at 12:53:38PM -0300, James Almer wrote:
> On 11/5/2017 8:35 PM, Aurelien Jacobs wrote:
> > This was originally based on libsbc, and was fully integrated into ffmpeg.
> > ---
> >  doc/general.texi                 |   2 +
> >  libavcodec/Makefile              |   4 +
> >  libavcodec/allcodecs.c           |   2 +
> >  libavcodec/arm/Makefile          |   3 +
> >  libavcodec/arm/sbcdsp_armv6.S    | 245 ++++++++++++++
> >  libavcodec/arm/sbcdsp_init_arm.c | 105 ++++++
> >  libavcodec/arm/sbcdsp_neon.S     | 714 +++++++++++++++++++++++++++++++++++++++
> >  libavcodec/avcodec.h             |   2 +
> >  libavcodec/codec_desc.c          |  12 +
> >  libavcodec/sbc.c                 | 316 +++++++++++++++++
> >  libavcodec/sbc.h                 | 121 +++++++
> >  libavcodec/sbcdec.c              | 469 +++++++++++++++++++++++++
> >  libavcodec/sbcdec_data.c         | 127 +++++++
> >  libavcodec/sbcdec_data.h         |  44 +++
> >  libavcodec/sbcdsp.c              | 569 +++++++++++++++++++++++++++++++
> >  libavcodec/sbcdsp.h              |  86 +++++
> >  libavcodec/sbcdsp_data.c         | 335 ++++++++++++++++++
> >  libavcodec/sbcdsp_data.h         |  57 ++++
> >  libavcodec/sbcenc.c              | 461 +++++++++++++++++++++++++
> >  libavcodec/x86/Makefile          |   2 +
> >  libavcodec/x86/sbcdsp.asm        | 290 ++++++++++++++++
> >  libavcodec/x86/sbcdsp_init.c     |  51 +++
> >  22 files changed, 4017 insertions(+)
> >  create mode 100644 libavcodec/arm/sbcdsp_armv6.S
> >  create mode 100644 libavcodec/arm/sbcdsp_init_arm.c
> >  create mode 100644 libavcodec/arm/sbcdsp_neon.S
> >  create mode 100644 libavcodec/sbc.c
> >  create mode 100644 libavcodec/sbc.h
> >  create mode 100644 libavcodec/sbcdec.c
> >  create mode 100644 libavcodec/sbcdec_data.c
> >  create mode 100644 libavcodec/sbcdec_data.h
> >  create mode 100644 libavcodec/sbcdsp.c
> >  create mode 100644 libavcodec/sbcdsp.h
> >  create mode 100644 libavcodec/sbcdsp_data.c
> >  create mode 100644 libavcodec/sbcdsp_data.h
> >  create mode 100644 libavcodec/sbcenc.c
> >  create mode 100644 libavcodec/x86/sbcdsp.asm
> >  create mode 100644 libavcodec/x86/sbcdsp_init.c
> 
> This needs to be split into at least four patches.
> One to add the decoder (plus codec ID, descriptor and such things), one
> to add the encoder (and the dsp framework), one to add the x86 assembly
> optimizations for the encoder, and one for the arm optimizations.

OK. New patchset is split this way.
diff mbox

Patch

diff --git a/doc/general.texi b/doc/general.texi
index 9e6ae13435..baaa308dcf 100644
--- a/doc/general.texi
+++ b/doc/general.texi
@@ -1096,6 +1096,8 @@  following image formats are supported:
     @tab Real low bitrate AC-3 codec
 @item RealAudio Lossless     @tab     @tab  X
 @item RealAudio SIPR / ACELP.NET @tab     @tab  X
+@item SBC (low-complexity subband codec) @tab  X  @tab  X
+    @tab Used in Bluetooth A2DP
 @item Shorten                @tab     @tab  X
 @item Sierra VMD audio       @tab     @tab  X
     @tab Used in Sierra VMD files.
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 3a33361f33..17648a1c3d 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -576,6 +576,10 @@  OBJS-$(CONFIG_SUBVIEWER_DECODER)       += subviewerdec.o ass.o
 OBJS-$(CONFIG_SUNRAST_DECODER)         += sunrast.o
 OBJS-$(CONFIG_SUNRAST_ENCODER)         += sunrastenc.o
 OBJS-$(CONFIG_LIBRSVG_DECODER)         += librsvgdec.o
+OBJS-$(CONFIG_SBC_DECODER)             += sbcdec.o sbcdec_data.o sbc.o
+OBJS-$(CONFIG_SBC_ENCODER)             += sbcenc.o sbc.o sbcdsp.o sbcdsp_data.o
+OBJS-$(CONFIG_MSBC_DECODER)            += sbcdec.o sbcdec_data.o sbc.o
+OBJS-$(CONFIG_MSBC_ENCODER)            += sbcenc.o sbc.o sbcdsp.o sbcdsp_data.o
 OBJS-$(CONFIG_SVQ1_DECODER)            += svq1dec.o svq1.o svq13.o h263data.o
 OBJS-$(CONFIG_SVQ1_ENCODER)            += svq1enc.o svq1.o  h263data.o  \
                                           h263.o ituh263enc.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 98655ddd7c..95cf67ce20 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -452,6 +452,7 @@  static void register_all(void)
     REGISTER_DECODER(MP3ON4FLOAT,       mp3on4float);
     REGISTER_DECODER(MPC7,              mpc7);
     REGISTER_DECODER(MPC8,              mpc8);
+    REGISTER_ENCDEC (MSBC,              msbc);
     REGISTER_ENCDEC (NELLYMOSER,        nellymoser);
     REGISTER_DECODER(ON2AVC,            on2avc);
     REGISTER_ENCDEC (OPUS,              opus);
@@ -465,6 +466,7 @@  static void register_all(void)
     REGISTER_DECODER(SHORTEN,           shorten);
     REGISTER_DECODER(SIPR,              sipr);
     REGISTER_DECODER(SMACKAUD,          smackaud);
+    REGISTER_ENCDEC (SBC,               sbc);
     REGISTER_ENCDEC (SONIC,             sonic);
     REGISTER_ENCODER(SONIC_LS,          sonic_ls);
     REGISTER_DECODER(TAK,               tak);
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index 1eeac5449e..fd2401f4e5 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -42,6 +42,7 @@  OBJS-$(CONFIG_DCA_DECODER)             += arm/synth_filter_init_arm.o
 OBJS-$(CONFIG_HEVC_DECODER)            += arm/hevcdsp_init_arm.o
 OBJS-$(CONFIG_MLP_DECODER)             += arm/mlpdsp_init_arm.o
 OBJS-$(CONFIG_RV40_DECODER)            += arm/rv40dsp_init_arm.o
+OBJS-$(CONFIG_SBC_ENCODER)             += arm/sbcdsp_init_arm.o
 OBJS-$(CONFIG_VORBIS_DECODER)          += arm/vorbisdsp_init_arm.o
 OBJS-$(CONFIG_VP6_DECODER)             += arm/vp6dsp_init_arm.o
 OBJS-$(CONFIG_VP9_DECODER)             += arm/vp9dsp_init_10bpp_arm.o   \
@@ -81,6 +82,7 @@  ARMV6-OBJS-$(CONFIG_VP8DSP)            += arm/vp8_armv6.o               \
 
 # decoders/encoders
 ARMV6-OBJS-$(CONFIG_MLP_DECODER)       += arm/mlpdsp_armv6.o
+ARMV6-OBJS-$(CONFIG_SBC_ENCODER)       += arm/sbcdsp_armv6.o
 
 
 # VFP optimizations
@@ -140,6 +142,7 @@  NEON-OBJS-$(CONFIG_HEVC_DECODER)       += arm/hevcdsp_init_neon.o       \
 NEON-OBJS-$(CONFIG_RV30_DECODER)       += arm/rv34dsp_neon.o
 NEON-OBJS-$(CONFIG_RV40_DECODER)       += arm/rv34dsp_neon.o            \
                                           arm/rv40dsp_neon.o
+NEON-OBJS-$(CONFIG_SBC_ENCODER)        += arm/sbcdsp_neon.o
 NEON-OBJS-$(CONFIG_VORBIS_DECODER)     += arm/vorbisdsp_neon.o
 NEON-OBJS-$(CONFIG_VP6_DECODER)        += arm/vp6dsp_neon.o
 NEON-OBJS-$(CONFIG_VP9_DECODER)        += arm/vp9itxfm_16bpp_neon.o     \
diff --git a/libavcodec/arm/sbcdsp_armv6.S b/libavcodec/arm/sbcdsp_armv6.S
new file mode 100644
index 0000000000..f1ff845798
--- /dev/null
+++ b/libavcodec/arm/sbcdsp_armv6.S
@@ -0,0 +1,245 @@ 
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC ARMv6 optimizations. The instructions are scheduled for ARM11 pipeline.
+ */
+
+#include "libavutil/arm/asm.S"
+
+function ff_sbc_analyze_4_armv6, export=1
+        @ r0 = in, r1 = out, r2 = consts
+        push            {r1, r3-r7, lr}
+        push            {r8-r12, r14}
+        ldrd            r4,  r5,  [r0, #0]
+        ldrd            r6,  r7,  [r2, #0]
+        ldrd            r8,  r9,  [r0, #16]
+        ldrd            r10, r11, [r2, #16]
+        mov             r14, #0x8000
+        smlad           r3,  r4,  r6,  r14
+        smlad           r12, r5,  r7,  r14
+        ldrd            r4,  r5,  [r0, #32]
+        ldrd            r6,  r7,  [r2, #32]
+        smlad           r3,  r8,  r10, r3
+        smlad           r12, r9,  r11, r12
+        ldrd            r8,  r9,  [r0, #48]
+        ldrd            r10, r11, [r2, #48]
+        smlad           r3,  r4,  r6,  r3
+        smlad           r12, r5,  r7,  r12
+        ldrd            r4,  r5,  [r0, #64]
+        ldrd            r6,  r7,  [r2, #64]
+        smlad           r3,  r8,  r10, r3
+        smlad           r12, r9,  r11, r12
+        ldrd            r8,  r9,  [r0, #8]
+        ldrd            r10, r11, [r2, #8]
+        smlad           r3,  r4,  r6,  r3        @ t1[0] is done
+        smlad           r12, r5,  r7,  r12       @ t1[1] is done
+        ldrd            r4,  r5,  [r0, #24]
+        ldrd            r6,  r7,  [r2, #24]
+        pkhtb           r3,  r12, r3, asr #16    @ combine t1[0] and t1[1]
+        smlad           r12, r8,  r10, r14
+        smlad           r14, r9,  r11, r14
+        ldrd            r8,  r9,  [r0, #40]
+        ldrd            r10, r11, [r2, #40]
+        smlad           r12, r4,  r6,  r12
+        smlad           r14, r5,  r7,  r14
+        ldrd            r4,  r5,  [r0, #56]
+        ldrd            r6,  r7,  [r2, #56]
+        smlad           r12, r8,  r10, r12
+        smlad           r14, r9,  r11, r14
+        ldrd            r8,  r9,  [r0, #72]
+        ldrd            r10, r11, [r2, #72]
+        smlad           r12, r4,  r6,  r12
+        smlad           r14, r5,  r7,  r14
+        ldrd            r4,  r5,  [r2, #80]      @ start loading cos table
+        smlad           r12, r8,  r10, r12       @ t1[2] is done
+        smlad           r14, r9,  r11, r14       @ t1[3] is done
+        ldrd            r6,  r7,  [r2, #88]
+        ldrd            r8,  r9,  [r2, #96]
+        ldrd            r10, r11, [r2, #104]     @ cos table fully loaded
+        pkhtb           r12, r14, r12, asr #16   @ combine t1[2] and t1[3]
+        smuad           r4,  r3,  r4
+        smuad           r5,  r3,  r5
+        smlad           r4,  r12, r8,  r4
+        smlad           r5,  r12, r9,  r5
+        smuad           r6,  r3,  r6
+        smuad           r7,  r3,  r7
+        smlad           r6,  r12, r10, r6
+        smlad           r7,  r12, r11, r7
+        pop             {r8-r12, r14}
+        stmia           r1, {r4, r5, r6, r7}
+        pop             {r1, r3-r7, pc}
+endfunc
+
+function ff_sbc_analyze_8_armv6, export=1
+        @ r0 = in, r1 = out, r2 = consts
+        push            {r1, r3-r7, lr}
+        push            {r8-r12, r14}
+        ldrd            r4,  r5,  [r0, #24]
+        ldrd            r6,  r7,  [r2, #24]
+        ldrd            r8,  r9,  [r0, #56]
+        ldrd            r10, r11, [r2, #56]
+        mov             r14, #0x8000
+        smlad           r3,  r4,  r6,  r14
+        smlad           r12, r5,  r7,  r14
+        ldrd            r4,  r5,  [r0, #88]
+        ldrd            r6,  r7,  [r2, #88]
+        smlad           r3,  r8,  r10, r3
+        smlad           r12, r9,  r11, r12
+        ldrd            r8,  r9,  [r0, #120]
+        ldrd            r10, r11, [r2, #120]
+        smlad           r3,  r4,  r6,  r3
+        smlad           r12, r5,  r7,  r12
+        ldrd            r4,  r5,  [r0, #152]
+        ldrd            r6,  r7,  [r2, #152]
+        smlad           r3,  r8,  r10, r3
+        smlad           r12, r9,  r11, r12
+        ldrd            r8,  r9,  [r0, #16]
+        ldrd            r10, r11, [r2, #16]
+        smlad           r3,  r4,  r6,  r3        @ t1[6] is done
+        smlad           r12, r5,  r7,  r12       @ t1[7] is done
+        ldrd            r4,  r5,  [r0, #48]
+        ldrd            r6,  r7,  [r2, #48]
+        pkhtb           r3,  r12, r3, asr #16    @ combine t1[6] and t1[7]
+        str             r3,  [sp, #-4]!          @ save to stack
+        smlad           r3,  r8,  r10, r14
+        smlad           r12, r9,  r11, r14
+        ldrd            r8,  r9,  [r0, #80]
+        ldrd            r10, r11, [r2, #80]
+        smlad           r3,  r4,  r6,  r3
+        smlad           r12, r5,  r7,  r12
+        ldrd            r4,  r5,  [r0, #112]
+        ldrd            r6,  r7,  [r2, #112]
+        smlad           r3,  r8,  r10, r3
+        smlad           r12, r9,  r11, r12
+        ldrd            r8,  r9,  [r0, #144]
+        ldrd            r10, r11, [r2, #144]
+        smlad           r3,  r4,  r6,  r3
+        smlad           r12, r5,  r7,  r12
+        ldrd            r4,  r5,  [r0, #0]
+        ldrd            r6,  r7,  [r2, #0]
+        smlad           r3,  r8,  r10, r3        @ t1[4] is done
+        smlad           r12, r9,  r11, r12       @ t1[5] is done
+        ldrd            r8,  r9,  [r0, #32]
+        ldrd            r10, r11, [r2, #32]
+        pkhtb           r3,  r12, r3, asr #16    @ combine t1[4] and t1[5]
+        str             r3,  [sp, #-4]!          @ save to stack
+        smlad           r3,  r4,  r6,  r14
+        smlad           r12, r5,  r7,  r14
+        ldrd            r4,  r5,  [r0, #64]
+        ldrd            r6,  r7,  [r2, #64]
+        smlad           r3,  r8,  r10, r3
+        smlad           r12, r9,  r11, r12
+        ldrd            r8,  r9,  [r0, #96]
+        ldrd            r10, r11, [r2, #96]
+        smlad           r3,  r4,  r6,  r3
+        smlad           r12, r5,  r7,  r12
+        ldrd            r4,  r5,  [r0, #128]
+        ldrd            r6,  r7,  [r2, #128]
+        smlad           r3,  r8,  r10, r3
+        smlad           r12, r9,  r11, r12
+        ldrd            r8,  r9,  [r0, #8]
+        ldrd            r10, r11, [r2, #8]
+        smlad           r3,  r4,  r6,  r3        @ t1[0] is done
+        smlad           r12, r5,  r7,  r12       @ t1[1] is done
+        ldrd            r4,  r5,  [r0, #40]
+        ldrd            r6,  r7,  [r2, #40]
+        pkhtb           r3,  r12, r3, asr #16    @ combine t1[0] and t1[1]
+        smlad           r12, r8,  r10, r14
+        smlad           r14, r9,  r11, r14
+        ldrd            r8,  r9,  [r0, #72]
+        ldrd            r10, r11, [r2, #72]
+        smlad           r12, r4,  r6,  r12
+        smlad           r14, r5,  r7,  r14
+        ldrd            r4,  r5,  [r0, #104]
+        ldrd            r6,  r7,  [r2, #104]
+        smlad           r12, r8,  r10, r12
+        smlad           r14, r9,  r11, r14
+        ldrd            r8,  r9,  [r0, #136]
+        ldrd            r10, r11, [r2, #136]!
+        smlad           r12, r4,  r6,  r12
+        smlad           r14, r5,  r7,  r14
+        ldrd            r4,  r5,  [r2, #(160 - 136 + 0)]
+        smlad           r12, r8,  r10, r12       @ t1[2] is done
+        smlad           r14, r9,  r11, r14       @ t1[3] is done
+        ldrd            r6,  r7,  [r2, #(160 - 136 + 8)]
+        smuad           r4,  r3,  r4
+        smuad           r5,  r3,  r5
+        pkhtb           r12, r14, r12, asr #16   @ combine t1[2] and t1[3]
+                                                 @ r3  = t2[0:1]
+                                                 @ r12 = t2[2:3]
+        pop             {r0, r14}                @ t2[4:5], t2[6:7]
+        ldrd            r8,  r9,  [r2, #(160 - 136 + 32)]
+        smuad           r6,  r3,  r6
+        smuad           r7,  r3,  r7
+        ldrd            r10, r11, [r2, #(160 - 136 + 40)]
+        smlad           r4,  r12, r8,  r4
+        smlad           r5,  r12, r9,  r5
+        ldrd            r8,  r9,  [r2, #(160 - 136 + 64)]
+        smlad           r6,  r12, r10, r6
+        smlad           r7,  r12, r11, r7
+        ldrd            r10, r11, [r2, #(160 - 136 + 72)]
+        smlad           r4,  r0,  r8,  r4
+        smlad           r5,  r0,  r9,  r5
+        ldrd            r8,  r9,  [r2, #(160 - 136 + 96)]
+        smlad           r6,  r0,  r10, r6
+        smlad           r7,  r0,  r11, r7
+        ldrd            r10, r11, [r2, #(160 - 136 + 104)]
+        smlad           r4,  r14, r8,  r4
+        smlad           r5,  r14, r9,  r5
+        ldrd            r8,  r9,  [r2, #(160 - 136 + 16 + 0)]
+        smlad           r6,  r14, r10, r6
+        smlad           r7,  r14, r11, r7
+        ldrd            r10, r11, [r2, #(160 - 136 + 16 + 8)]
+        stmia           r1!, {r4, r5}
+        smuad           r4,  r3,  r8
+        smuad           r5,  r3,  r9
+        ldrd            r8,  r9,  [r2, #(160 - 136 + 16 + 32)]
+        stmia           r1!, {r6, r7}
+        smuad           r6,  r3,  r10
+        smuad           r7,  r3,  r11
+        ldrd            r10, r11, [r2, #(160 - 136 + 16 + 40)]
+        smlad           r4,  r12, r8,  r4
+        smlad           r5,  r12, r9,  r5
+        ldrd            r8,  r9,  [r2, #(160 - 136 + 16 + 64)]
+        smlad           r6,  r12, r10, r6
+        smlad           r7,  r12, r11, r7
+        ldrd            r10, r11, [r2, #(160 - 136 + 16 + 72)]
+        smlad           r4,  r0,  r8,  r4
+        smlad           r5,  r0,  r9,  r5
+        ldrd            r8,  r9,  [r2, #(160 - 136 + 16 + 96)]
+        smlad           r6,  r0,  r10, r6
+        smlad           r7,  r0,  r11, r7
+        ldrd            r10, r11, [r2, #(160 - 136 + 16 + 104)]
+        smlad           r4,  r14, r8,  r4
+        smlad           r5,  r14, r9,  r5
+        smlad           r6,  r14, r10, r6
+        smlad           r7,  r14, r11, r7
+        pop             {r8-r12, r14}
+        stmia           r1!, {r4, r5, r6, r7}
+        pop             {r1, r3-r7, pc}
+endfunc
diff --git a/libavcodec/arm/sbcdsp_init_arm.c b/libavcodec/arm/sbcdsp_init_arm.c
new file mode 100644
index 0000000000..6bf7e729ef
--- /dev/null
+++ b/libavcodec/arm/sbcdsp_init_arm.c
@@ -0,0 +1,105 @@ 
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC ARMv6 optimization for some basic "building bricks"
+ */
+
+#include "libavutil/cpu.h"
+#include "libavutil/arm/cpu.h"
+#include "libavcodec/sbcdsp.h"
+
+void ff_sbc_analyze_4_armv6(const int16_t *in, int32_t *out, const int16_t *consts);
+void ff_sbc_analyze_8_armv6(const int16_t *in, int32_t *out, const int16_t *consts);
+
+void ff_sbc_analyze_4_neon(const int16_t *in, int32_t *out, const int16_t *consts);
+void ff_sbc_analyze_8_neon(const int16_t *in, int32_t *out, const int16_t *consts);
+void ff_sbc_calc_scalefactors_neon(int32_t sb_sample_f[16][2][8],
+                                   uint32_t scale_factor[2][8],
+                                   int blocks, int channels, int subbands);
+int ff_sbc_calc_scalefactors_j_neon(int32_t sb_sample_f[16][2][8],
+                                    uint32_t scale_factor[2][8],
+                                    int blocks, int subbands);
+int ff_sbc_enc_process_input_4s_neon(int position, const uint8_t *pcm,
+                                     int16_t X[2][SBC_X_BUFFER_SIZE],
+                                     int nsamples, int nchannels);
+int ff_sbc_enc_process_input_8s_neon(int position, const uint8_t *pcm,
+                                     int16_t X[2][SBC_X_BUFFER_SIZE],
+                                     int nsamples, int nchannels);
+
+DECLARE_ALIGNED(SBC_ALIGN, int32_t, ff_sbcdsp_joint_bits_mask)[8] = {
+    8,   4,  2,  1, 128, 64, 32, 16
+};
+
+#if HAVE_BIGENDIAN
+#define PERM(a, b, c, d) {        \
+        (a * 2) + 1, (a * 2) + 0, \
+        (b * 2) + 1, (b * 2) + 0, \
+        (c * 2) + 1, (c * 2) + 0, \
+        (d * 2) + 1, (d * 2) + 0  \
+    }
+#else
+#define PERM(a, b, c, d) {        \
+        (a * 2) + 0, (a * 2) + 1, \
+        (b * 2) + 0, (b * 2) + 1, \
+        (c * 2) + 0, (c * 2) + 1, \
+        (d * 2) + 0, (d * 2) + 1  \
+    }
+#endif
+
+DECLARE_ALIGNED(SBC_ALIGN, uint8_t, ff_sbc_input_perm_4)[2][8] = {
+    PERM(7, 3, 6, 4),
+    PERM(0, 2, 1, 5)
+};
+
+DECLARE_ALIGNED(SBC_ALIGN, uint8_t, ff_sbc_input_perm_8)[4][8] = {
+    PERM(15, 7, 14,  8),
+    PERM(13, 9, 12, 10),
+    PERM(11, 3,  6,  0),
+    PERM( 5, 1,  4,  2)
+};
+
+av_cold void ff_sbcdsp_init_arm(SBCDSPContext *s)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_armv6(cpu_flags)) {
+        s->sbc_analyze_4 = ff_sbc_analyze_4_armv6;
+        s->sbc_analyze_8 = ff_sbc_analyze_8_armv6;
+    }
+
+    if (have_neon(cpu_flags)) {
+        s->sbc_analyze_4 = ff_sbc_analyze_4_neon;
+        s->sbc_analyze_8 = ff_sbc_analyze_8_neon;
+        s->sbc_calc_scalefactors = ff_sbc_calc_scalefactors_neon;
+        s->sbc_calc_scalefactors_j = ff_sbc_calc_scalefactors_j_neon;
+        if (s->increment != 1) {
+            s->sbc_enc_process_input_4s = ff_sbc_enc_process_input_4s_neon;
+            s->sbc_enc_process_input_8s = ff_sbc_enc_process_input_8s_neon;
+        }
+    }
+}
diff --git a/libavcodec/arm/sbcdsp_neon.S b/libavcodec/arm/sbcdsp_neon.S
new file mode 100644
index 0000000000..d83d21d202
--- /dev/null
+++ b/libavcodec/arm/sbcdsp_neon.S
@@ -0,0 +1,714 @@ 
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC ARM NEON optimizations
+ */
+
+#include "libavutil/arm/asm.S"
+#include "neon.S"
+
+#define SBC_PROTO_FIXED_SCALE 16
+
+function ff_sbc_analyze_4_neon, export=1
+        /* TODO: merge even and odd cases (or even merge all four calls to this
+         * function) in order to have only aligned reads from 'in' array
+         * and reduce number of load instructions */
+        vld1.16         {d4, d5}, [r0, :64]!
+        vld1.16         {d8, d9}, [r2, :128]!
+
+        vmull.s16       q0, d4, d8
+        vld1.16         {d6,  d7}, [r0, :64]!
+        vmull.s16       q1, d5, d9
+        vld1.16         {d10, d11}, [r2, :128]!
+
+        vmlal.s16       q0, d6, d10
+        vld1.16         {d4, d5}, [r0, :64]!
+        vmlal.s16       q1, d7, d11
+        vld1.16         {d8, d9}, [r2, :128]!
+
+        vmlal.s16       q0, d4, d8
+        vld1.16         {d6,  d7}, [r0, :64]!
+        vmlal.s16       q1, d5, d9
+        vld1.16         {d10, d11}, [r2, :128]!
+
+        vmlal.s16       q0, d6, d10
+        vld1.16         {d4, d5}, [r0, :64]!
+        vmlal.s16       q1, d7, d11
+        vld1.16         {d8, d9}, [r2, :128]!
+
+        vmlal.s16       q0, d4, d8
+        vmlal.s16       q1, d5, d9
+
+        vpadd.s32       d0, d0, d1
+        vpadd.s32       d1, d2, d3
+
+        vrshrn.s32      d0, q0, SBC_PROTO_FIXED_SCALE
+
+        vld1.16         {d2, d3, d4, d5}, [r2, :128]!
+
+        vdup.i32        d1, d0[1]  /* TODO: can be eliminated */
+        vdup.i32        d0, d0[0]  /* TODO: can be eliminated */
+
+        vmull.s16       q3, d2, d0
+        vmull.s16       q4, d3, d0
+        vmlal.s16       q3, d4, d1
+        vmlal.s16       q4, d5, d1
+
+        vpadd.s32       d0, d6, d7 /* TODO: can be eliminated */
+        vpadd.s32       d1, d8, d9 /* TODO: can be eliminated */
+
+        vst1.32         {d0, d1}, [r1, :128]
+
+        bx              lr
+endfunc
+
+function ff_sbc_analyze_8_neon, export=1
+        /* TODO: merge even and odd cases (or even merge all four calls to this
+         * function) in order to have only aligned reads from 'in' array
+         * and reduce number of load instructions */
+        vld1.16         {d4, d5}, [r0, :64]!
+        vld1.16         {d8, d9}, [r2, :128]!
+
+        vmull.s16       q6, d4, d8
+        vld1.16         {d6,  d7}, [r0, :64]!
+        vmull.s16       q7, d5, d9
+        vld1.16         {d10, d11}, [r2, :128]!
+        vmull.s16       q8, d6, d10
+        vld1.16         {d4, d5}, [r0, :64]!
+        vmull.s16       q9, d7, d11
+        vld1.16         {d8, d9}, [r2, :128]!
+
+        vmlal.s16       q6, d4, d8
+        vld1.16         {d6,  d7}, [r0, :64]!
+        vmlal.s16       q7, d5, d9
+        vld1.16         {d10, d11}, [r2, :128]!
+        vmlal.s16       q8, d6, d10
+        vld1.16         {d4, d5}, [r0, :64]!
+        vmlal.s16       q9, d7, d11
+        vld1.16         {d8, d9}, [r2, :128]!
+
+        vmlal.s16       q6, d4, d8
+        vld1.16         {d6,  d7}, [r0, :64]!
+        vmlal.s16       q7, d5, d9
+        vld1.16         {d10, d11}, [r2, :128]!
+        vmlal.s16       q8, d6, d10
+        vld1.16         {d4, d5}, [r0, :64]!
+        vmlal.s16       q9, d7, d11
+        vld1.16         {d8, d9}, [r2, :128]!
+
+        vmlal.s16       q6, d4, d8
+        vld1.16         {d6,  d7}, [r0, :64]!
+        vmlal.s16       q7, d5, d9
+        vld1.16         {d10, d11}, [r2, :128]!
+        vmlal.s16       q8, d6, d10
+        vld1.16         {d4, d5}, [r0, :64]!
+        vmlal.s16       q9, d7, d11
+        vld1.16         {d8, d9}, [r2, :128]!
+
+        vmlal.s16       q6, d4, d8
+        vld1.16         {d6,  d7}, [r0, :64]!
+        vmlal.s16       q7, d5, d9
+        vld1.16         {d10, d11}, [r2, :128]!
+
+        vmlal.s16       q8, d6, d10
+        vmlal.s16       q9, d7, d11
+
+        vpadd.s32       d0, d12, d13
+        vpadd.s32       d1, d14, d15
+        vpadd.s32       d2, d16, d17
+        vpadd.s32       d3, d18, d19
+
+        vrshr.s32       q0, q0, SBC_PROTO_FIXED_SCALE
+        vrshr.s32       q1, q1, SBC_PROTO_FIXED_SCALE
+        vmovn.s32       d0, q0
+        vmovn.s32       d1, q1
+
+        vdup.i32        d3, d1[1]  /* TODO: can be eliminated */
+        vdup.i32        d2, d1[0]  /* TODO: can be eliminated */
+        vdup.i32        d1, d0[1]  /* TODO: can be eliminated */
+        vdup.i32        d0, d0[0]  /* TODO: can be eliminated */
+
+        vld1.16         {d4, d5}, [r2, :128]!
+        vmull.s16       q6, d4, d0
+        vld1.16         {d6, d7}, [r2, :128]!
+        vmull.s16       q7, d5, d0
+        vmull.s16       q8, d6, d0
+        vmull.s16       q9, d7, d0
+
+        vld1.16         {d4, d5}, [r2, :128]!
+        vmlal.s16       q6, d4, d1
+        vld1.16         {d6, d7}, [r2, :128]!
+        vmlal.s16       q7, d5, d1
+        vmlal.s16       q8, d6, d1
+        vmlal.s16       q9, d7, d1
+
+        vld1.16         {d4, d5}, [r2, :128]!
+        vmlal.s16       q6, d4, d2
+        vld1.16         {d6, d7}, [r2, :128]!
+        vmlal.s16       q7, d5, d2
+        vmlal.s16       q8, d6, d2
+        vmlal.s16       q9, d7, d2
+
+        vld1.16         {d4, d5}, [r2, :128]!
+        vmlal.s16       q6, d4, d3
+        vld1.16         {d6, d7}, [r2, :128]!
+        vmlal.s16       q7, d5, d3
+        vmlal.s16       q8, d6, d3
+        vmlal.s16       q9, d7, d3
+
+        vpadd.s32       d0, d12, d13 /* TODO: can be eliminated */
+        vpadd.s32       d1, d14, d15 /* TODO: can be eliminated */
+        vpadd.s32       d2, d16, d17 /* TODO: can be eliminated */
+        vpadd.s32       d3, d18, d19 /* TODO: can be eliminated */
+
+        vst1.32         {d0, d1, d2, d3}, [r1, :128]
+
+        bx              lr
+endfunc
+
+function ff_sbc_calc_scalefactors_neon, export=1
+        @ parameters
+        @ r0 = sb_sample_f
+        @ r1 = scale_factor
+        @ r2 = blocks
+        @ r3 = channels
+        @ r4 = subbands
+        @ local variables
+        @ r5 = in_loop_1
+        @ r6 = in
+        @ r7 = out_loop_1
+        @ r8 = out
+        @ r9 = ch
+        @ r10 = sb
+        @ r11 = inc
+        @ r12 = blk
+
+        push            {r1-r2, r4-r12}
+        ldr             r4,  [sp, #44]
+        mov             r11, #64
+
+        mov             r9,  #0
+1:
+        add             r5,  r0,  r9, lsl#5
+        add             r7,  r1,  r9, lsl#5
+
+        mov             r10,  #0
+2:
+        add             r6,  r5,  r10, lsl#2
+        add             r8,  r7,  r10, lsl#2
+        mov             r12, r2
+
+        vmov.s32        q0,  #0
+        vmov.s32        q1,  #0x8000            @ 1 << SCALE_OUT_BITS
+        vmov.s32        q14, #1
+        vmov.s32        q15, #16                @ 31 - SCALE_OUT_BITS
+        vadd.s32        q1,  q1,  q14
+3:
+        vld1.32         {d16, d17}, [r6, :128], r11
+        vabs.s32        q8,  q8
+        vld1.32         {d18, d19}, [r6, :128], r11
+        vabs.s32        q9,  q9
+        vld1.32         {d20, d21}, [r6, :128], r11
+        vabs.s32        q10, q10
+        vld1.32         {d22, d23}, [r6, :128], r11
+        vabs.s32        q11, q11
+        vmax.s32        q0,  q0,  q8
+        vmax.s32        q1,  q1,  q9
+        vmax.s32        q0,  q0,  q10
+        vmax.s32        q1,  q1,  q11
+        subs            r12, r12, #4
+        bgt             3b
+        vmax.s32        q0,  q0,  q1
+        vsub.s32        q0,  q0,  q14
+        vclz.s32        q0,  q0
+        vsub.s32        q0,  q15, q0
+        vst1.32         {d0, d1}, [r8, :128]
+
+        add             r10, r10, #4
+        cmp             r10, r4
+        blt             2b
+
+        add             r9,  r9,  #1
+        cmp             r9,  r3
+        blt             1b
+
+        pop             {r1-r2, r4-r12}
+        bx              lr
+endfunc
+
+/*
+ * constants: q13 = (31 - SCALE_OUT_BITS)
+ *            q14 = 1
+ * input:     q0  - ((1 << SCALE_OUT_BITS) + 1)
+ *            r5  - samples for channel 0
+ *            r6  - samples for shannel 1
+ * output:    q0, q1 - scale factors without joint stereo
+ *            q2, q3 - scale factors with joint stereo
+ *            q15    - joint stereo selection mask
+ */
+.macro calc_scalefactors
+        vmov.s32        q1,  q0
+        vmov.s32        q2,  q0
+        vmov.s32        q3,  q0
+        mov             r3,  r2
+1:
+        vld1.32         {d18, d19}, [r6, :128], r11
+        vbic.s32        q11, q9,  q14
+        vld1.32         {d16, d17}, [r5, :128], r11
+        vhadd.s32       q10, q8,  q11
+        vhsub.s32       q11, q8,  q11
+        vabs.s32        q8,  q8
+        vabs.s32        q9,  q9
+        vabs.s32        q10, q10
+        vabs.s32        q11, q11
+        vmax.s32        q0,  q0,  q8
+        vmax.s32        q1,  q1,  q9
+        vmax.s32        q2,  q2,  q10
+        vmax.s32        q3,  q3,  q11
+        subs            r3,  r3,  #1
+        bgt             1b
+        vsub.s32        q0,  q0,  q14
+        vsub.s32        q1,  q1,  q14
+        vsub.s32        q2,  q2,  q14
+        vsub.s32        q3,  q3,  q14
+        vclz.s32        q0,  q0
+        vclz.s32        q1,  q1
+        vclz.s32        q2,  q2
+        vclz.s32        q3,  q3
+        vsub.s32        q0,  q13, q0
+        vsub.s32        q1,  q13, q1
+        vsub.s32        q2,  q13, q2
+        vsub.s32        q3,  q13, q3
+.endm
+
+/*
+ * constants: q14 = 1
+ * input: q15 - joint stereo selection mask
+ *        r5  - value set by calc_scalefactors macro
+ *        r6  - value set by calc_scalefactors macro
+ */
+.macro update_joint_stereo_samples
+        sub             r8,  r6,  r11
+        sub             r7,  r5,  r11
+        sub             r6,  r6,  r11, asl #1
+        sub             r5,  r5,  r11, asl #1
+        vld1.32         {d18, d19}, [r6, :128]
+        vbic.s32        q11, q9,  q14
+        vld1.32         {d16, d17}, [r5, :128]
+        vld1.32         {d2, d3}, [r8, :128]
+        vbic.s32        q3,  q1,  q14
+        vld1.32         {d0, d1}, [r7, :128]
+        vhsub.s32       q10, q8,  q11
+        vhadd.s32       q11, q8,  q11
+        vhsub.s32       q2,  q0,  q3
+        vhadd.s32       q3,  q0,  q3
+        vbif.s32        q10, q9,  q15
+        vbif.s32        d22, d16, d30
+        sub             r11, r10, r11, asl #1
+        sub             r3,  r2,  #2
+2:
+        vbif.s32        d23, d17, d31
+        vst1.32         {d20, d21}, [r6, :128], r11
+        vbif.s32        d4,  d2,  d30
+        vld1.32         {d18, d19}, [r6, :128]
+        vbif.s32        d5,  d3,  d31
+        vst1.32         {d22, d23}, [r5, :128], r11
+        vbif.s32        d6,  d0,  d30
+        vld1.32         {d16, d17}, [r5, :128]
+        vbif.s32        d7,  d1,  d31
+        vst1.32         {d4, d5}, [r8, :128], r11
+        vbic.s32        q11, q9,  q14
+        vld1.32         {d2, d3}, [r8, :128]
+        vst1.32         {d6, d7}, [r7, :128], r11
+        vbic.s32        q3,  q1,  q14
+        vld1.32         {d0, d1}, [r7, :128]
+        vhsub.s32       q10, q8,  q11
+        vhadd.s32       q11, q8,  q11
+        vhsub.s32       q2,  q0,  q3
+        vhadd.s32       q3,  q0,  q3
+        vbif.s32        q10, q9,  q15
+        vbif.s32        d22, d16, d30
+        subs            r3,  r3,  #2
+        bgt             2b
+        sub             r11, r10, r11, asr #1
+        vbif.s32        d23, d17, d31
+        vst1.32         {d20, d21}, [r6, :128]
+        vbif.s32        q2,  q1,  q15
+        vst1.32         {d22, d23}, [r5, :128]
+        vbif.s32        q3,  q0,  q15
+        vst1.32         {d4, d5}, [r8, :128]
+        vst1.32         {d6, d7}, [r7, :128]
+.endm
+
+function ff_sbc_calc_scalefactors_j_neon, export=1
+        @ parameters
+        @ r0 = in = sb_sample_f
+        @ r1 = out = scale_factor
+        @ r2 = blocks
+        @ r3 = subbands
+        @ local variables
+        @ r4 = consts = ff_sbcdsp_joint_bits_mask
+        @ r5 = in0
+        @ r6 = in1
+        @ r7 = out0
+        @ r8 = out1
+        @ r10 = zero
+        @ r11 = inc
+        @ return r0 = joint
+
+        push            {r3-r11}
+        movrelx         r4,  X(ff_sbcdsp_joint_bits_mask)
+        mov             r10, #0
+        mov             r11, #64
+
+        vmov.s32        q14, #1
+        vmov.s32        q13, #16    @ 31 - SCALE_OUT_BITS
+
+        cmp             r3, #4
+        bne             8f
+
+4:      @ 4 subbands
+        add             r5,  r0,  #0
+        add             r6,  r0,  #32
+        add             r7,  r1,  #0
+        add             r8,  r1,  #32
+        vmov.s32        q0,  #0x8000    @ 1 << SCALE_OUT_BITS
+        vadd.s32        q0,  q0,  q14
+
+        calc_scalefactors
+
+        @ check whether to use joint stereo for subbands 0, 1, 2
+        vadd.s32        q15, q0,  q1
+        vadd.s32        q9,  q2,  q3
+        vmov.s32        d31[1], r10    @ last subband -> no joint
+        vld1.32         {d16, d17}, [r4, :128]!
+        vcgt.s32        q15, q15, q9
+
+        @ calculate and save to memory 'joint' variable
+        @ update and save scale factors to memory
+        vand.s32        q8,  q8,  q15
+        vbit.s32        q0,  q2,  q15
+        vpadd.s32       d16, d16, d17
+        vbit.s32        q1,  q3,  q15
+        vpadd.s32       d16, d16, d16
+        vst1.32         {d0, d1}, [r7, :128]
+        vst1.32         {d2, d3}, [r8, :128]
+        vmov.32         r0, d16[0]
+
+        update_joint_stereo_samples
+        b               9f
+
+8:      @ 8 subbands
+        add             r5,  r0,  #16
+        add             r6,  r0,  #48
+        add             r7,  r1,  #16
+        add             r8,  r1,  #48
+        vmov.s32        q0,  #0x8000    @ 1 << SCALE_OUT_BITS
+        vadd.s32        q0,  q0,  q14
+
+        calc_scalefactors
+
+        @ check whether to use joint stereo for subbands 4, 5, 6
+        vadd.s32        q15, q0,  q1
+        vadd.s32        q9,  q2,  q3
+        vmov.s32        d31[1], r10    @ last subband -> no joint
+        vld1.32         {d16, d17}, [r4, :128]!
+        vcgt.s32        q15, q15, q9
+
+        @ calculate part of 'joint' variable and save it to d24
+        @ update and save scale factors to memory
+        vand.s32        q8,  q8,  q15
+        vbit.s32        q0,  q2,  q15
+        vpadd.s32       d16, d16, d17
+        vbit.s32        q1,  q3,  q15
+        vst1.32         {d0, d1}, [r7, :128]
+        vst1.32         {d2, d3}, [r8, :128]
+        vpadd.s32       d24, d16, d16
+
+        update_joint_stereo_samples
+
+        add             r5,  r0,  #0
+        add             r6,  r0,  #32
+        add             r7,  r1,  #0
+        add             r8,  r1,  #32
+        vmov.s32        q0,  #0x8000    @ 1 << SCALE_OUT_BITS
+        vadd.s32        q0,  q0,  q14
+
+        calc_scalefactors
+
+        @ check whether to use joint stereo for subbands 0, 1, 2, 3
+        vadd.s32        q15, q0,  q1
+        vadd.s32        q9,  q2,  q3
+        vld1.32         {d16, d17}, [r4, :128]!
+        vcgt.s32        q15, q15, q9
+
+        @ combine last part of 'joint' with d24 and save to memory
+        @ update and save scale factors to memory
+        vand.s32        q8,  q8,  q15
+        vbit.s32        q0,  q2,  q15
+        vpadd.s32       d16, d16, d17
+        vbit.s32        q1,  q3,  q15
+        vpadd.s32       d16, d16, d16
+        vst1.32         {d0, d1}, [r7, :128]
+        vadd.s32        d16, d16, d24
+        vst1.32         {d2, d3}, [r8, :128]
+        vmov.32         r0,  d16[0]
+
+        update_joint_stereo_samples
+9:
+        pop             {r3-r11}
+        bx              lr
+endfunc
+
+function ff_sbc_enc_process_input_4s_neon, export=1
+        @ parameters
+        @ r0 = positioin
+        @ r1 = pcm
+        @ r2 = X
+        @ r3 = nsamples
+        @ r4 = nchannels
+        @ local variables
+        @ r5 = ff_sbc_input_perm_4
+        @ r6 = src / x
+        @ r7 = dst / y
+
+        push            {r1, r3-r7}
+        ldr             r4,  [sp, #24]
+        movrelx         r5,  X(ff_sbc_input_perm_4)
+
+        @ handle X buffer wraparound
+        cmp             r0,  r3
+        bge             1f                     @ if (position < nsamples)
+        add             r7,  r2,  #576         @ &X[0][SBC_X_BUFFER_SIZE - 40]
+        add             r6,  r2,  r0, lsl#1    @ &X[0][position]
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0}, [r6, :64]!
+        vst1.16         {d0}, [r7, :64]!
+        cmp             r4,  #1
+        ble             2f                     @ if (nchannels > 1)
+        add             r7,  r2,  #1232        @ &X[1][SBC_X_BUFFER_SIZE - 40]
+        add             r6,  r2,  #656
+        add             r6,  r6,  r0, lsl#1    @ &X[1][position]
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0}, [r6, :64]!
+        vst1.16         {d0}, [r7, :64]!
+2:
+        mov             r0,  #288              @ SBC_X_BUFFER_SIZE - 40
+1:
+
+        add             r6,  r2,  r0, lsl#1    @ &X[0][position]
+        add             r7,  r6,  #656         @ &X[1][position]
+
+        cmp             r4,  #1
+        ble             8f                     @ if (nchannels > 1)
+        tst             r1,  #1
+        beq             7f                     @ if (pcm & 1)
+        @ poor 'pcm' alignment
+        vld1.8          {d0, d1}, [r5, :128]
+1:
+        sub             r6,  r6,  #16
+        sub             r7,  r7,  #16
+        sub             r0,  r0,  #8
+        vld1.8          {d4, d5}, [r1]!
+        vuzp.16         d4,  d5
+        vld1.8          {d20, d21}, [r1]!
+        vuzp.16         d20, d21
+        vswp            d5,  d20
+        vtbl.8          d16, {d4, d5}, d0
+        vtbl.8          d17, {d4, d5}, d1
+        vtbl.8          d18, {d20, d21}, d0
+        vtbl.8          d19, {d20, d21}, d1
+        vst1.16         {d16, d17}, [r6, :128]
+        vst1.16         {d18, d19}, [r7, :128]
+        subs            r3,  r3,  #8
+        bgt             1b
+        b               9f
+7:
+        @ proper 'pcm' alignment
+        vld1.8          {d0, d1}, [r5, :128]
+1:
+        sub             r6,  r6,  #16
+        sub             r7,  r7,  #16
+        sub             r0,  r0,  #8
+        vld2.16         {d4, d5}, [r1]!
+        vld2.16         {d20, d21}, [r1]!
+        vswp            d5,  d20
+        vtbl.8          d16, {d4, d5}, d0
+        vtbl.8          d17, {d4, d5}, d1
+        vtbl.8          d18, {d20, d21}, d0
+        vtbl.8          d19, {d20, d21}, d1
+        vst1.16         {d16, d17}, [r6, :128]
+        vst1.16         {d18, d19}, [r7, :128]
+        subs            r3,  r3,  #8
+        bgt             1b
+        b               9f
+8:
+        @ mono
+        vld1.8          {d0, d1}, [r5, :128]
+1:
+        sub             r6,  r6,  #16
+        sub             r0,  r0,  #8
+        vld1.8          {d4, d5}, [r1]!
+        vtbl.8          d16, {d4, d5}, d0
+        vtbl.8          d17, {d4, d5}, d1
+        vst1.16         {d16, d17}, [r6, :128]
+        subs            r3,  r3,  #8
+        bgt             1b
+9:
+        pop             {r1, r3-r7}
+        bx              lr
+endfunc
+
+function ff_sbc_enc_process_input_8s_neon, export=1
+        @ parameters
+        @ r0 = positioin
+        @ r1 = pcm
+        @ r2 = X
+        @ r3 = nsamples
+        @ r4 = nchannels
+        @ local variables
+        @ r5 = ff_sbc_input_perm_8
+        @ r6 = src
+        @ r7 = dst
+
+        push            {r1, r3-r7}
+        ldr             r4,  [sp, #24]
+        movrelx         r5,  X(ff_sbc_input_perm_8)
+
+        @ handle X buffer wraparound
+        cmp             r0,  r3
+        bge             1f                     @ if (position < nsamples)
+        add             r7,  r2,  #512         @ &X[0][SBC_X_BUFFER_SIZE - 72]
+        add             r6,  r2,  r0, lsl#1    @ &X[0][position]
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0, d1}, [r6, :128]!
+        vst1.16         {d0, d1}, [r7, :128]!
+        cmp             r4,  #1
+        ble             2f                     @ if (nchannels > 1)
+        add             r7,  r2,  #1168        @ &X[1][SBC_X_BUFFER_SIZE - 72]
+        add             r6,  r2,  #656
+        add             r6,  r6,  r0, lsl#1    @ &X[1][position]
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0, d1, d2, d3}, [r6, :128]!
+        vst1.16         {d0, d1, d2, d3}, [r7, :128]!
+        vld1.16         {d0, d1}, [r6, :128]!
+        vst1.16         {d0, d1}, [r7, :128]!
+2:
+        mov             r0,  #256              @ SBC_X_BUFFER_SIZE - 72
+1:
+
+        add             r6,  r2,  r0, lsl#1    @ &X[0][position]
+        add             r7,  r6,  #656         @ &X[1][position]
+
+        cmp             r4,  #1
+        ble             8f                     @ if (nchannels > 1)
+        tst             r1,  #1
+        beq             7f                     @ if (pcm & 1)
+        @ poor 'pcm' alignment
+        vld1.8          {d0, d1, d2, d3}, [r5, :128]
+1:
+        sub             r6,  r6,  #32
+        sub             r7,  r7,  #32
+        sub             r0,  r0,  #16
+        vld1.8          {d4, d5, d6, d7}, [r1]!
+        vuzp.16         q2,  q3
+        vld1.8          {d20, d21, d22, d23}, [r1]!
+        vuzp.16         q10, q11
+        vswp            q3,  q10
+        vtbl.8          d16, {d4, d5, d6, d7}, d0
+        vtbl.8          d17, {d4, d5, d6, d7}, d1
+        vtbl.8          d18, {d4, d5, d6, d7}, d2
+        vtbl.8          d19, {d4, d5, d6, d7}, d3
+        vst1.16         {d16, d17, d18, d19}, [r6, :128]
+        vtbl.8          d16, {d20, d21, d22, d23}, d0
+        vtbl.8          d17, {d20, d21, d22, d23}, d1
+        vtbl.8          d18, {d20, d21, d22, d23}, d2
+        vtbl.8          d19, {d20, d21, d22, d23}, d3
+        vst1.16         {d16, d17, d18, d19}, [r7, :128]
+        subs            r3,  r3,  #16
+        bgt             1b
+        b 9f
+7:
+        @ proper 'pcm' alignment
+        vld1.8          {d0, d1, d2, d3}, [r5, :128]
+1:
+        sub             r6,  r6,  #32
+        sub             r7,  r7,  #32
+        sub             r0,  r0,  #16
+        vld2.16         {d4, d5, d6, d7}, [r1]!
+        vld2.16         {d20, d21, d22, d23}, [r1]!
+        vswp            q3,  q10
+        vtbl.8          d16, {d4, d5, d6, d7}, d0
+        vtbl.8          d17, {d4, d5, d6, d7}, d1
+        vtbl.8          d18, {d4, d5, d6, d7}, d2
+        vtbl.8          d19, {d4, d5, d6, d7}, d3
+        vst1.16         {d16, d17, d18, d19}, [r6, :128]
+        vtbl.8          d16, {d20, d21, d22, d23}, d0
+        vtbl.8          d17, {d20, d21, d22, d23}, d1
+        vtbl.8          d18, {d20, d21, d22, d23}, d2
+        vtbl.8          d19, {d20, d21, d22, d23}, d3
+        vst1.16         {d16, d17, d18, d19}, [r7, :128]
+        subs            r3,  r3,  #16
+        bgt             1b
+        b               9f
+8:
+        @ mono
+        vld1.8          {d0, d1, d2, d3}, [r5, :128]
+1:
+        sub             r6,  r6,  #32
+        sub             r0,  r0,  #16
+        vld1.8          {d4, d5, d6, d7}, [r1]!
+        vtbl.8          d16, {d4, d5, d6, d7}, d0
+        vtbl.8          d17, {d4, d5, d6, d7}, d1
+        vtbl.8          d18, {d4, d5, d6, d7}, d2
+        vtbl.8          d19, {d4, d5, d6, d7}, d3
+        vst1.16         {d16, d17, d18, d19}, [r6, :128]
+        subs            r3,  r3,  #16
+        bgt             1b
+9:
+        pop             {r1, r3-r7}
+        bx              lr
+endfunc
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index c4134424f0..2d541bf64a 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -632,6 +632,8 @@  enum AVCodecID {
     AV_CODEC_ID_ATRAC3AL,
     AV_CODEC_ID_ATRAC3PAL,
     AV_CODEC_ID_DOLBY_E,
+    AV_CODEC_ID_SBC,
+    AV_CODEC_ID_MSBC,
 
     /* subtitle codecs */
     AV_CODEC_ID_FIRST_SUBTITLE = 0x17000,          ///< A dummy ID pointing at the start of subtitle codecs.
diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
index 92bf1d2681..8d613507e0 100644
--- a/libavcodec/codec_desc.c
+++ b/libavcodec/codec_desc.c
@@ -2859,6 +2859,18 @@  static const AVCodecDescriptor codec_descriptors[] = {
         .long_name = NULL_IF_CONFIG_SMALL("ADPCM MTAF"),
         .props     = AV_CODEC_PROP_LOSSY,
     },
+    {
+        .id        = AV_CODEC_ID_SBC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "sbc",
+        .long_name = NULL_IF_CONFIG_SMALL("SBC (low-complexity subband codec)"),
+    },
+    {
+        .id        = AV_CODEC_ID_MSBC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "msbc",
+        .long_name = NULL_IF_CONFIG_SMALL("mSBC (wideband speech mono SBC)"),
+    },
 
     /* subtitle codecs */
     {
diff --git a/libavcodec/sbc.c b/libavcodec/sbc.c
new file mode 100644
index 0000000000..99d02cc56a
--- /dev/null
+++ b/libavcodec/sbc.c
@@ -0,0 +1,316 @@ 
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2012-2013  Intel Corporation
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2008  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC common functions for the encoder and decoder
+ */
+
+#include "avcodec.h"
+#include "sbc.h"
+
+/*
+ * Calculates the CRC-8 of the first len bits in data
+ */
+static const uint8_t crc_table[256] = {
+    0x00, 0x1D, 0x3A, 0x27, 0x74, 0x69, 0x4E, 0x53,
+    0xE8, 0xF5, 0xD2, 0xCF, 0x9C, 0x81, 0xA6, 0xBB,
+    0xCD, 0xD0, 0xF7, 0xEA, 0xB9, 0xA4, 0x83, 0x9E,
+    0x25, 0x38, 0x1F, 0x02, 0x51, 0x4C, 0x6B, 0x76,
+    0x87, 0x9A, 0xBD, 0xA0, 0xF3, 0xEE, 0xC9, 0xD4,
+    0x6F, 0x72, 0x55, 0x48, 0x1B, 0x06, 0x21, 0x3C,
+    0x4A, 0x57, 0x70, 0x6D, 0x3E, 0x23, 0x04, 0x19,
+    0xA2, 0xBF, 0x98, 0x85, 0xD6, 0xCB, 0xEC, 0xF1,
+    0x13, 0x0E, 0x29, 0x34, 0x67, 0x7A, 0x5D, 0x40,
+    0xFB, 0xE6, 0xC1, 0xDC, 0x8F, 0x92, 0xB5, 0xA8,
+    0xDE, 0xC3, 0xE4, 0xF9, 0xAA, 0xB7, 0x90, 0x8D,
+    0x36, 0x2B, 0x0C, 0x11, 0x42, 0x5F, 0x78, 0x65,
+    0x94, 0x89, 0xAE, 0xB3, 0xE0, 0xFD, 0xDA, 0xC7,
+    0x7C, 0x61, 0x46, 0x5B, 0x08, 0x15, 0x32, 0x2F,
+    0x59, 0x44, 0x63, 0x7E, 0x2D, 0x30, 0x17, 0x0A,
+    0xB1, 0xAC, 0x8B, 0x96, 0xC5, 0xD8, 0xFF, 0xE2,
+    0x26, 0x3B, 0x1C, 0x01, 0x52, 0x4F, 0x68, 0x75,
+    0xCE, 0xD3, 0xF4, 0xE9, 0xBA, 0xA7, 0x80, 0x9D,
+    0xEB, 0xF6, 0xD1, 0xCC, 0x9F, 0x82, 0xA5, 0xB8,
+    0x03, 0x1E, 0x39, 0x24, 0x77, 0x6A, 0x4D, 0x50,
+    0xA1, 0xBC, 0x9B, 0x86, 0xD5, 0xC8, 0xEF, 0xF2,
+    0x49, 0x54, 0x73, 0x6E, 0x3D, 0x20, 0x07, 0x1A,
+    0x6C, 0x71, 0x56, 0x4B, 0x18, 0x05, 0x22, 0x3F,
+    0x84, 0x99, 0xBE, 0xA3, 0xF0, 0xED, 0xCA, 0xD7,
+    0x35, 0x28, 0x0F, 0x12, 0x41, 0x5C, 0x7B, 0x66,
+    0xDD, 0xC0, 0xE7, 0xFA, 0xA9, 0xB4, 0x93, 0x8E,
+    0xF8, 0xE5, 0xC2, 0xDF, 0x8C, 0x91, 0xB6, 0xAB,
+    0x10, 0x0D, 0x2A, 0x37, 0x64, 0x79, 0x5E, 0x43,
+    0xB2, 0xAF, 0x88, 0x95, 0xC6, 0xDB, 0xFC, 0xE1,
+    0x5A, 0x47, 0x60, 0x7D, 0x2E, 0x33, 0x14, 0x09,
+    0x7F, 0x62, 0x45, 0x58, 0x0B, 0x16, 0x31, 0x2C,
+    0x97, 0x8A, 0xAD, 0xB0, 0xE3, 0xFE, 0xD9, 0xC4
+};
+
+uint8_t ff_sbc_crc8(const uint8_t *data, size_t len)
+{
+    uint8_t crc = 0x0f;
+    size_t i;
+    uint8_t octet;
+
+    for (i = 0; i < len / 8; i++)
+        crc = crc_table[crc ^ data[i]];
+
+    octet = data[i];
+    for (i = 0; i < len % 8; i++) {
+        char bit = ((octet ^ crc) & 0x80) >> 7;
+
+        crc = ((crc & 0x7f) << 1) ^ (bit ? 0x1d : 0);
+
+        octet = octet << 1;
+    }
+
+    return crc;
+}
+
+/* A2DP specification: Appendix B, page 69 */
+static const int sbc_offset4[4][4] = {
+    { -1, 0, 0, 0 },
+    { -2, 0, 0, 1 },
+    { -2, 0, 0, 1 },
+    { -2, 0, 0, 1 }
+};
+
+/* A2DP specification: Appendix B, page 69 */
+static const int sbc_offset8[4][8] = {
+    { -2, 0, 0, 0, 0, 0, 0, 1 },
+    { -3, 0, 0, 0, 0, 0, 1, 2 },
+    { -4, 0, 0, 0, 0, 0, 1, 2 },
+    { -4, 0, 0, 0, 0, 0, 1, 2 }
+};
+
+/*
+ * Code straight from the spec to calculate the bits array
+ * Takes a pointer to the frame in question, a pointer to the bits array and
+ * the sampling frequency (as 2 bit integer)
+ */
+static av_always_inline void sbc_calculate_bits_internal(
+        const struct sbc_frame *frame, int (*bits)[8], int subbands)
+{
+    uint8_t sf = frame->frequency;
+
+    if (frame->mode == MONO || frame->mode == DUAL_CHANNEL) {
+        int bitneed[2][8], loudness, max_bitneed, bitcount, slicecount, bitslice;
+        int ch, sb;
+
+        for (ch = 0; ch < frame->channels; ch++) {
+            max_bitneed = 0;
+            if (frame->allocation == SNR) {
+                for (sb = 0; sb < subbands; sb++) {
+                    bitneed[ch][sb] = frame->scale_factor[ch][sb];
+                    if (bitneed[ch][sb] > max_bitneed)
+                        max_bitneed = bitneed[ch][sb];
+                }
+            } else {
+                for (sb = 0; sb < subbands; sb++) {
+                    if (frame->scale_factor[ch][sb] == 0)
+                        bitneed[ch][sb] = -5;
+                    else {
+                        if (subbands == 4)
+                            loudness = frame->scale_factor[ch][sb] - sbc_offset4[sf][sb];
+                        else
+                            loudness = frame->scale_factor[ch][sb] - sbc_offset8[sf][sb];
+                        if (loudness > 0)
+                            bitneed[ch][sb] = loudness / 2;
+                        else
+                            bitneed[ch][sb] = loudness;
+                    }
+                    if (bitneed[ch][sb] > max_bitneed)
+                        max_bitneed = bitneed[ch][sb];
+                }
+            }
+
+            bitcount = 0;
+            slicecount = 0;
+            bitslice = max_bitneed + 1;
+            do {
+                bitslice--;
+                bitcount += slicecount;
+                slicecount = 0;
+                for (sb = 0; sb < subbands; sb++) {
+                    if ((bitneed[ch][sb] > bitslice + 1) && (bitneed[ch][sb] < bitslice + 16))
+                        slicecount++;
+                    else if (bitneed[ch][sb] == bitslice + 1)
+                        slicecount += 2;
+                }
+            } while (bitcount + slicecount < frame->bitpool);
+
+            if (bitcount + slicecount == frame->bitpool) {
+                bitcount += slicecount;
+                bitslice--;
+            }
+
+            for (sb = 0; sb < subbands; sb++) {
+                if (bitneed[ch][sb] < bitslice + 2)
+                    bits[ch][sb] = 0;
+                else {
+                    bits[ch][sb] = bitneed[ch][sb] - bitslice;
+                    if (bits[ch][sb] > 16)
+                        bits[ch][sb] = 16;
+                }
+            }
+
+            for (sb = 0; bitcount < frame->bitpool &&
+                            sb < subbands; sb++) {
+                if ((bits[ch][sb] >= 2) && (bits[ch][sb] < 16)) {
+                    bits[ch][sb]++;
+                    bitcount++;
+                } else if ((bitneed[ch][sb] == bitslice + 1) && (frame->bitpool > bitcount + 1)) {
+                    bits[ch][sb] = 2;
+                    bitcount += 2;
+                }
+            }
+
+            for (sb = 0; bitcount < frame->bitpool &&
+                            sb < subbands; sb++) {
+                if (bits[ch][sb] < 16) {
+                    bits[ch][sb]++;
+                    bitcount++;
+                }
+            }
+
+        }
+
+    } else if (frame->mode == STEREO || frame->mode == JOINT_STEREO) {
+        int bitneed[2][8], loudness, max_bitneed, bitcount, slicecount, bitslice;
+        int ch, sb;
+
+        max_bitneed = 0;
+        if (frame->allocation == SNR) {
+            for (ch = 0; ch < 2; ch++) {
+                for (sb = 0; sb < subbands; sb++) {
+                    bitneed[ch][sb] = frame->scale_factor[ch][sb];
+                    if (bitneed[ch][sb] > max_bitneed)
+                        max_bitneed = bitneed[ch][sb];
+                }
+            }
+        } else {
+            for (ch = 0; ch < 2; ch++) {
+                for (sb = 0; sb < subbands; sb++) {
+                    if (frame->scale_factor[ch][sb] == 0)
+                        bitneed[ch][sb] = -5;
+                    else {
+                        if (subbands == 4)
+                            loudness = frame->scale_factor[ch][sb] - sbc_offset4[sf][sb];
+                        else
+                            loudness = frame->scale_factor[ch][sb] - sbc_offset8[sf][sb];
+                        if (loudness > 0)
+                            bitneed[ch][sb] = loudness / 2;
+                        else
+                            bitneed[ch][sb] = loudness;
+                    }
+                    if (bitneed[ch][sb] > max_bitneed)
+                        max_bitneed = bitneed[ch][sb];
+                }
+            }
+        }
+
+        bitcount = 0;
+        slicecount = 0;
+        bitslice = max_bitneed + 1;
+        do {
+            bitslice--;
+            bitcount += slicecount;
+            slicecount = 0;
+            for (ch = 0; ch < 2; ch++) {
+                for (sb = 0; sb < subbands; sb++) {
+                    if ((bitneed[ch][sb] > bitslice + 1) && (bitneed[ch][sb] < bitslice + 16))
+                        slicecount++;
+                    else if (bitneed[ch][sb] == bitslice + 1)
+                        slicecount += 2;
+                }
+            }
+        } while (bitcount + slicecount < frame->bitpool);
+
+        if (bitcount + slicecount == frame->bitpool) {
+            bitcount += slicecount;
+            bitslice--;
+        }
+
+        for (ch = 0; ch < 2; ch++) {
+            for (sb = 0; sb < subbands; sb++) {
+                if (bitneed[ch][sb] < bitslice + 2) {
+                    bits[ch][sb] = 0;
+                } else {
+                    bits[ch][sb] = bitneed[ch][sb] - bitslice;
+                    if (bits[ch][sb] > 16)
+                        bits[ch][sb] = 16;
+                }
+            }
+        }
+
+        ch = 0;
+        sb = 0;
+        while (bitcount < frame->bitpool) {
+            if ((bits[ch][sb] >= 2) && (bits[ch][sb] < 16)) {
+                bits[ch][sb]++;
+                bitcount++;
+            } else if ((bitneed[ch][sb] == bitslice + 1) && (frame->bitpool > bitcount + 1)) {
+                bits[ch][sb] = 2;
+                bitcount += 2;
+            }
+            if (ch == 1) {
+                ch = 0;
+                sb++;
+                if (sb >= subbands)
+                    break;
+            } else
+                ch = 1;
+        }
+
+        ch = 0;
+        sb = 0;
+        while (bitcount < frame->bitpool) {
+            if (bits[ch][sb] < 16) {
+                bits[ch][sb]++;
+                bitcount++;
+            }
+            if (ch == 1) {
+                ch = 0;
+                sb++;
+                if (sb >= subbands)
+                    break;
+            } else
+                ch = 1;
+        }
+
+    }
+
+}
+
+void ff_sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
+{
+    if (frame->subbands == 4)
+        sbc_calculate_bits_internal(frame, bits, 4);
+    else
+        sbc_calculate_bits_internal(frame, bits, 8);
+}
diff --git a/libavcodec/sbc.h b/libavcodec/sbc.h
new file mode 100644
index 0000000000..169e38f4c1
--- /dev/null
+++ b/libavcodec/sbc.h
@@ -0,0 +1,121 @@ 
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2012-2014  Intel Corporation
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC common definitions for the encoder and decoder
+ */
+
+#ifndef AVCODEC_SBC_H
+#define AVCODEC_SBC_H
+
+#include "avcodec.h"
+
+#define MSBC_BLOCKS 15
+
+/* sampling frequency */
+#define SBC_FREQ_16000  0x00
+#define SBC_FREQ_32000  0x01
+#define SBC_FREQ_44100  0x02
+#define SBC_FREQ_48000  0x03
+
+/* blocks */
+#define SBC_BLK_4       0x00
+#define SBC_BLK_8       0x01
+#define SBC_BLK_12      0x02
+#define SBC_BLK_16      0x03
+
+/* channel mode */
+#define SBC_MODE_MONO         0x00
+#define SBC_MODE_DUAL_CHANNEL 0x01
+#define SBC_MODE_STEREO       0x02
+#define SBC_MODE_JOINT_STEREO 0x03
+
+/* allocation method */
+#define SBC_AM_LOUDNESS 0x00
+#define SBC_AM_SNR      0x01
+
+/* subbands */
+#define SBC_SB_4        0x00
+#define SBC_SB_8        0x01
+
+/* synchronisation words */
+#define SBC_SYNCWORD   0x9C
+#define MSBC_SYNCWORD  0xAD
+
+/* extra bits of precision for the synthesis filter input data */
+#define SBCDEC_FIXED_EXTRA_BITS 2
+
+/*
+ * Enforce 16 byte alignment for the data, which is supposed to be used
+ * with SIMD optimized code.
+ */
+#define SBC_ALIGN 16
+
+/* This structure contains an unpacked SBC frame.
+   Yes, there is probably quite some unused space herein */
+struct sbc_frame {
+    uint8_t frequency;
+    uint8_t block_mode;
+    uint8_t blocks;
+    enum {
+        MONO         = SBC_MODE_MONO,
+        DUAL_CHANNEL = SBC_MODE_DUAL_CHANNEL,
+        STEREO       = SBC_MODE_STEREO,
+        JOINT_STEREO = SBC_MODE_JOINT_STEREO
+    } mode;
+    uint8_t channels;
+    enum {
+        LOUDNESS = SBC_AM_LOUDNESS,
+        SNR      = SBC_AM_SNR
+    } allocation;
+    uint8_t subband_mode;
+    uint8_t subbands;
+    uint8_t bitpool;
+    uint16_t codesize;
+    uint16_t length;
+
+    /* bit number x set means joint stereo has been used in subband x */
+    uint8_t joint;
+
+    /* only the lower 4 bits of every element are to be used */
+    DECLARE_ALIGNED(SBC_ALIGN, uint32_t, scale_factor)[2][8];
+
+    /* raw integer subband samples in the frame */
+    DECLARE_ALIGNED(SBC_ALIGN, int32_t, sb_sample_f)[16][2][8];
+
+    /* modified subband samples */
+    DECLARE_ALIGNED(SBC_ALIGN, int32_t, sb_sample)[16][2][8];
+
+    /* original pcm audio samples */
+    DECLARE_ALIGNED(SBC_ALIGN, int16_t, pcm_sample)[2][16*8];
+};
+
+uint8_t ff_sbc_crc8(const uint8_t *data, size_t len);
+void ff_sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]);
+
+#endif /* AVCODEC_SBC_H */
diff --git a/libavcodec/sbcdec.c b/libavcodec/sbcdec.c
new file mode 100644
index 0000000000..f2a40ad117
--- /dev/null
+++ b/libavcodec/sbcdec.c
@@ -0,0 +1,469 @@ 
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2012-2013  Intel Corporation
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2008  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC decoder implementation
+ */
+
+#include <stdbool.h>
+#include "avcodec.h"
+#include "internal.h"
+#include "sbc.h"
+#include "sbcdec_data.h"
+
+struct sbc_decoder_state {
+    int32_t V[2][170];
+    int offset[2][16];
+};
+
+typedef struct SBCDecContext {
+    AVClass *class;
+    DECLARE_ALIGNED(SBC_ALIGN, struct sbc_frame, frame);
+    DECLARE_ALIGNED(SBC_ALIGN, struct sbc_decoder_state, dsp);
+    int (*unpack_frame)(const uint8_t *data, struct sbc_frame *frame,
+            size_t len);
+} SBCDecContext;
+
+/*
+ * Unpacks a SBC frame at the beginning of the stream in data,
+ * which has at most len bytes into frame.
+ * Returns the length in bytes of the packed frame, or a negative
+ * value on error. The error codes are:
+ *
+ *  -1   Data stream too short
+ *  -2   Sync byte incorrect
+ *  -3   CRC8 incorrect
+ *  -4   Bitpool value out of bounds
+ */
+static int sbc_unpack_frame_internal(const uint8_t *data,
+                                     struct sbc_frame *frame, size_t len)
+{
+    unsigned int consumed;
+    /* Will copy the parts of the header that are relevant to crc
+     * calculation here */
+    uint8_t crc_header[11] = { 0 };
+    int crc_pos = 0;
+    int32_t temp;
+
+    uint32_t audio_sample;
+    int ch, sb, blk, bit;   /* channel, subband, block and bit standard
+                               counters */
+    int bits[2][8];         /* bits distribution */
+    uint32_t levels[2][8];  /* levels derived from that */
+
+    consumed = 32;
+
+    crc_header[0] = data[1];
+    crc_header[1] = data[2];
+    crc_pos = 16;
+
+    if (frame->mode == JOINT_STEREO) {
+        if (len * 8 < consumed + frame->subbands)
+            return -1;
+
+        frame->joint = 0x00;
+        for (sb = 0; sb < frame->subbands - 1; sb++)
+            frame->joint |= ((data[4] >> (7 - sb)) & 0x01) << sb;
+        if (frame->subbands == 4)
+            crc_header[crc_pos / 8] = data[4] & 0xf0;
+        else
+            crc_header[crc_pos / 8] = data[4];
+
+        consumed += frame->subbands;
+        crc_pos += frame->subbands;
+    }
+
+    if (len * 8 < consumed + (4 * frame->subbands * frame->channels))
+        return -1;
+
+    for (ch = 0; ch < frame->channels; ch++) {
+        for (sb = 0; sb < frame->subbands; sb++) {
+            /* FIXME assert(consumed % 4 == 0); */
+            frame->scale_factor[ch][sb] =
+                (data[consumed >> 3] >> (4 - (consumed & 0x7))) & 0x0F;
+            crc_header[crc_pos >> 3] |=
+                frame->scale_factor[ch][sb] << (4 - (crc_pos & 0x7));
+
+            consumed += 4;
+            crc_pos += 4;
+        }
+    }
+
+    if (data[3] != ff_sbc_crc8(crc_header, crc_pos))
+        return -3;
+
+    ff_sbc_calculate_bits(frame, bits);
+
+    for (ch = 0; ch < frame->channels; ch++) {
+        for (sb = 0; sb < frame->subbands; sb++)
+            levels[ch][sb] = (1 << bits[ch][sb]) - 1;
+    }
+
+    for (blk = 0; blk < frame->blocks; blk++) {
+        for (ch = 0; ch < frame->channels; ch++) {
+            for (sb = 0; sb < frame->subbands; sb++) {
+                uint32_t shift;
+
+                if (levels[ch][sb] == 0) {
+                    frame->sb_sample[blk][ch][sb] = 0;
+                    continue;
+                }
+
+                shift = frame->scale_factor[ch][sb] +
+                        1 + SBCDEC_FIXED_EXTRA_BITS;
+
+                audio_sample = 0;
+                for (bit = 0; bit < bits[ch][sb]; bit++) {
+                    if (consumed > len * 8)
+                        return -1;
+
+                    if ((data[consumed >> 3] >> (7 - (consumed & 0x7))) & 0x01)
+                        audio_sample |= 1 << (bits[ch][sb] - bit - 1);
+
+                    consumed++;
+                }
+
+                frame->sb_sample[blk][ch][sb] = (int32_t)
+                    (((((uint64_t) audio_sample << 1) | 1) << shift) /
+                    levels[ch][sb]) - (1 << shift);
+            }
+        }
+    }
+
+    if (frame->mode == JOINT_STEREO) {
+        for (blk = 0; blk < frame->blocks; blk++) {
+            for (sb = 0; sb < frame->subbands; sb++) {
+                if (frame->joint & (0x01 << sb)) {
+                    temp = frame->sb_sample[blk][0][sb] +
+                           frame->sb_sample[blk][1][sb];
+                    frame->sb_sample[blk][1][sb] =
+                        frame->sb_sample[blk][0][sb] -
+                        frame->sb_sample[blk][1][sb];
+                    frame->sb_sample[blk][0][sb] = temp;
+                }
+            }
+        }
+    }
+
+    if ((consumed & 0x7) != 0)
+        consumed += 8 - (consumed & 0x7);
+
+    return consumed >> 3;
+}
+
+static int sbc_unpack_frame(const uint8_t *data, struct sbc_frame *frame,
+                            size_t len)
+{
+    if (len < 4)
+        return -1;
+
+    if (data[0] != SBC_SYNCWORD)
+        return -2;
+
+    frame->frequency  = (data[1] >> 6) & 0x03;
+    frame->block_mode = (data[1] >> 4) & 0x03;
+
+    switch (frame->block_mode) {
+    case SBC_BLK_4:
+        frame->blocks = 4;
+        break;
+    case SBC_BLK_8:
+        frame->blocks = 8;
+        break;
+    case SBC_BLK_12:
+        frame->blocks = 12;
+        break;
+    case SBC_BLK_16:
+        frame->blocks = 16;
+        break;
+    }
+
+    frame->mode = (data[1] >> 2) & 0x03;
+
+    switch (frame->mode) {
+    case MONO:
+        frame->channels = 1;
+        break;
+    case DUAL_CHANNEL:    /* fall-through */
+    case STEREO:
+    case JOINT_STEREO:
+        frame->channels = 2;
+        break;
+    }
+
+    frame->allocation = (data[1] >> 1) & 0x01;
+
+    frame->subband_mode = (data[1] & 0x01);
+    frame->subbands = frame->subband_mode ? 8 : 4;
+
+    frame->bitpool = data[2];
+
+    if ((frame->mode == MONO || frame->mode == DUAL_CHANNEL) &&
+            frame->bitpool > 16 * frame->subbands)
+        return -4;
+
+    if ((frame->mode == STEREO || frame->mode == JOINT_STEREO) &&
+            frame->bitpool > 32 * frame->subbands)
+        return -4;
+
+    return sbc_unpack_frame_internal(data, frame, len);
+}
+
+static int msbc_unpack_frame(const uint8_t *data,
+                             struct sbc_frame *frame, size_t len)
+{
+    if (len < 4)
+        return -1;
+
+    if (data[0] != MSBC_SYNCWORD)
+        return -2;
+    if (data[1] != 0)
+        return -2;
+    if (data[2] != 0)
+        return -2;
+
+    frame->frequency = SBC_FREQ_16000;
+    frame->block_mode = SBC_BLK_4;
+    frame->blocks = MSBC_BLOCKS;
+    frame->allocation = LOUDNESS;
+    frame->mode = MONO;
+    frame->channels = 1;
+    frame->subband_mode = 1;
+    frame->subbands = 8;
+    frame->bitpool = 26;
+
+    return sbc_unpack_frame_internal(data, frame, len);
+}
+
+static void sbc_decoder_init(struct sbc_decoder_state *state)
+{
+    int i, ch;
+
+    memset(state->V, 0, sizeof(state->V));
+
+    for (ch = 0; ch < 2; ch++)
+        for (i = 0; i < FF_ARRAY_ELEMS(state->offset[0]); i++)
+            state->offset[ch][i] = (10 * i + 10);
+}
+
+static inline void sbc_synthesize_four(struct sbc_decoder_state *state,
+                                       struct sbc_frame *frame, int ch, int blk)
+{
+    int i, k, idx;
+    int32_t *v = state->V[ch];
+    int *offset = state->offset[ch];
+
+    for (i = 0; i < 8; i++) {
+        /* Shifting */
+        offset[i]--;
+        if (offset[i] < 0) {
+            offset[i] = 79;
+            memcpy(v + 80, v, 9 * sizeof(*v));
+        }
+
+        /* Distribute the new matrix value to the shifted position */
+        v[offset[i]] =
+            ( ff_synmatrix4[i][0] * frame->sb_sample[blk][ch][0] +
+              ff_synmatrix4[i][1] * frame->sb_sample[blk][ch][1] +
+              ff_synmatrix4[i][2] * frame->sb_sample[blk][ch][2] +
+              ff_synmatrix4[i][3] * frame->sb_sample[blk][ch][3] ) >> 15;
+    }
+
+    /* Compute the samples */
+    for (idx = 0, i = 0; i < 4; i++, idx += 5) {
+        k = (i + 4) & 0xf;
+
+        /* Store in output, Q0 */
+        frame->pcm_sample[ch][blk * 4 + i] = av_clip_int16(
+            ( v[offset[i] + 0] * ff_sbc_proto_4_40m0[idx + 0] +
+              v[offset[k] + 1] * ff_sbc_proto_4_40m1[idx + 0] +
+              v[offset[i] + 2] * ff_sbc_proto_4_40m0[idx + 1] +
+              v[offset[k] + 3] * ff_sbc_proto_4_40m1[idx + 1] +
+              v[offset[i] + 4] * ff_sbc_proto_4_40m0[idx + 2] +
+              v[offset[k] + 5] * ff_sbc_proto_4_40m1[idx + 2] +
+              v[offset[i] + 6] * ff_sbc_proto_4_40m0[idx + 3] +
+              v[offset[k] + 7] * ff_sbc_proto_4_40m1[idx + 3] +
+              v[offset[i] + 8] * ff_sbc_proto_4_40m0[idx + 4] +
+              v[offset[k] + 9] * ff_sbc_proto_4_40m1[idx + 4] ) >> 15);
+    }
+}
+
+static inline void sbc_synthesize_eight(struct sbc_decoder_state *state,
+                                        struct sbc_frame *frame,
+                                        int ch, int blk)
+{
+    int i, k, idx;
+    int32_t *v = state->V[ch];
+    int *offset = state->offset[ch];
+
+    for (i = 0; i < 16; i++) {
+        /* Shifting */
+        offset[i]--;
+        if (offset[i] < 0) {
+            offset[i] = 159;
+            memcpy(v + 160, v, 9 * sizeof(*v));
+        }
+
+        /* Distribute the new matrix value to the shifted position */
+        v[offset[i]] =
+            ( ff_synmatrix8[i][0] * frame->sb_sample[blk][ch][0] +
+              ff_synmatrix8[i][1] * frame->sb_sample[blk][ch][1] +
+              ff_synmatrix8[i][2] * frame->sb_sample[blk][ch][2] +
+              ff_synmatrix8[i][3] * frame->sb_sample[blk][ch][3] +
+              ff_synmatrix8[i][4] * frame->sb_sample[blk][ch][4] +
+              ff_synmatrix8[i][5] * frame->sb_sample[blk][ch][5] +
+              ff_synmatrix8[i][6] * frame->sb_sample[blk][ch][6] +
+              ff_synmatrix8[i][7] * frame->sb_sample[blk][ch][7] ) >> 15;
+    }
+
+    /* Compute the samples */
+    for (idx = 0, i = 0; i < 8; i++, idx += 5) {
+        k = (i + 8) & 0xf;
+
+        /* Store in output, Q0 */
+        frame->pcm_sample[ch][blk * 8 + i] = av_clip_int16(
+            ( v[offset[i] + 0] * ff_sbc_proto_8_80m0[idx + 0] +
+              v[offset[k] + 1] * ff_sbc_proto_8_80m1[idx + 0] +
+              v[offset[i] + 2] * ff_sbc_proto_8_80m0[idx + 1] +
+              v[offset[k] + 3] * ff_sbc_proto_8_80m1[idx + 1] +
+              v[offset[i] + 4] * ff_sbc_proto_8_80m0[idx + 2] +
+              v[offset[k] + 5] * ff_sbc_proto_8_80m1[idx + 2] +
+              v[offset[i] + 6] * ff_sbc_proto_8_80m0[idx + 3] +
+              v[offset[k] + 7] * ff_sbc_proto_8_80m1[idx + 3] +
+              v[offset[i] + 8] * ff_sbc_proto_8_80m0[idx + 4] +
+              v[offset[k] + 9] * ff_sbc_proto_8_80m1[idx + 4] ) >> 15);
+    }
+}
+
+static int sbc_synthesize_audio(struct sbc_decoder_state *state,
+                                struct sbc_frame *frame)
+{
+    int ch, blk;
+
+    switch (frame->subbands) {
+    case 4:
+        for (ch = 0; ch < frame->channels; ch++) {
+            for (blk = 0; blk < frame->blocks; blk++)
+                sbc_synthesize_four(state, frame, ch, blk);
+        }
+        return frame->blocks * 4;
+
+    case 8:
+        for (ch = 0; ch < frame->channels; ch++) {
+            for (blk = 0; blk < frame->blocks; blk++)
+                sbc_synthesize_eight(state, frame, ch, blk);
+        }
+        return frame->blocks * 8;
+
+    default:
+        return AVERROR(EIO);
+    }
+}
+
+static int sbc_decode_init(AVCodecContext *avctx)
+{
+    SBCDecContext *sbc = avctx->priv_data;
+    sbc->unpack_frame = sbc_unpack_frame;
+    sbc_decoder_init(&sbc->dsp);
+    return 0;
+}
+
+static int msbc_decode_init(AVCodecContext *avctx)
+{
+    SBCDecContext *sbc = avctx->priv_data;
+    sbc->unpack_frame = msbc_unpack_frame;
+    sbc_decoder_init(&sbc->dsp);
+    return 0;
+}
+
+static int sbc_decode_frame(AVCodecContext *avctx,
+                            void *data, int *got_frame_ptr,
+                            AVPacket *avpkt)
+{
+    SBCDecContext *sbc = avctx->priv_data;
+    int i, ch, samples, ret;
+    AVFrame *frame = data;
+    int16_t *ptr;
+
+    if (!sbc)
+        return AVERROR(EIO);
+
+    sbc->frame.length = sbc->unpack_frame(avpkt->data, &sbc->frame, avpkt->size);
+    if (sbc->frame.length <= 0)
+        return sbc->frame.length;
+
+    samples = sbc_synthesize_audio(&sbc->dsp, &sbc->frame);
+
+    frame->nb_samples = samples;
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+        return ret;
+    ptr = (int16_t *)frame->data[0];
+
+    for (i = 0; i < samples; i++)
+        for (ch = 0; ch < sbc->frame.channels; ch++)
+            *ptr++ = sbc->frame.pcm_sample[ch][i];
+
+    *got_frame_ptr = 1;
+
+    return sbc->frame.length;
+}
+
+#if CONFIG_SBC_DECODER
+AVCodec ff_sbc_decoder = {
+    .name                  = "sbc",
+    .long_name             = NULL_IF_CONFIG_SMALL("SBC (low-complexity subband codec)"),
+    .type                  = AVMEDIA_TYPE_AUDIO,
+    .id                    = AV_CODEC_ID_SBC,
+    .priv_data_size        = sizeof(SBCDecContext),
+    .init                  = sbc_decode_init,
+    .decode                = sbc_decode_frame,
+    .capabilities          = AV_CODEC_CAP_DR1,
+    .channel_layouts       = (const uint64_t[]) { AV_CH_LAYOUT_MONO,
+                                                  AV_CH_LAYOUT_STEREO, 0},
+    .sample_fmts           = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16,
+                                                             AV_SAMPLE_FMT_NONE },
+    .supported_samplerates = (const int[]) { 16000, 32000, 44100, 48000, 0 },
+};
+#endif
+
+#if CONFIG_MSBC_DECODER
+AVCodec ff_msbc_decoder = {
+    .name                  = "msbc",
+    .long_name             = NULL_IF_CONFIG_SMALL("mSBC (wideband speech mono SBC)"),
+    .type                  = AVMEDIA_TYPE_AUDIO,
+    .id                    = AV_CODEC_ID_MSBC,
+    .priv_data_size        = sizeof(SBCDecContext),
+    .init                  = msbc_decode_init,
+    .decode                = sbc_decode_frame,
+    .capabilities          = AV_CODEC_CAP_DR1,
+    .channel_layouts       = (const uint64_t[]) { AV_CH_LAYOUT_MONO, 0},
+    .sample_fmts           = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16,
+                                                             AV_SAMPLE_FMT_NONE },
+    .supported_samplerates = (const int[]) { 16000, 0 },
+};
+#endif
diff --git a/libavcodec/sbcdec_data.c b/libavcodec/sbcdec_data.c
new file mode 100644
index 0000000000..2152162207
--- /dev/null
+++ b/libavcodec/sbcdec_data.c
@@ -0,0 +1,127 @@ 
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC decoder tables
+ */
+
+#include <stdint.h>
+#include "sbcdec_data.h"
+#include "sbc.h"
+
+#define SS4(val)  ((int32_t)val >> 12)
+#define SS8(val)  ((int32_t)val >> 14)
+#define SN4(val)  ((int32_t)val >> 11 + 1 + SBCDEC_FIXED_EXTRA_BITS)
+#define SN8(val)  ((int32_t)val >> 11 + 1 + SBCDEC_FIXED_EXTRA_BITS)
+
+const int32_t ff_sbc_proto_4_40m0[] = {
+    SS4(0x00000000), SS4(0xffa6982f), SS4(0xfba93848), SS4(0x0456c7b8),
+    SS4(0x005967d1), SS4(0xfffb9ac7), SS4(0xff589157), SS4(0xf9c2a8d8),
+    SS4(0x027c1434), SS4(0x0019118b), SS4(0xfff3c74c), SS4(0xff137330),
+    SS4(0xf81b8d70), SS4(0x00ec1b8b), SS4(0xfff0b71a), SS4(0xffe99b00),
+    SS4(0xfef84470), SS4(0xf6fb4370), SS4(0xffcdc351), SS4(0xffe01dc7)
+};
+
+const int32_t ff_sbc_proto_4_40m1[] = {
+    SS4(0xffe090ce), SS4(0xff2c0475), SS4(0xf694f800), SS4(0xff2c0475),
+    SS4(0xffe090ce), SS4(0xffe01dc7), SS4(0xffcdc351), SS4(0xf6fb4370),
+    SS4(0xfef84470), SS4(0xffe99b00), SS4(0xfff0b71a), SS4(0x00ec1b8b),
+    SS4(0xf81b8d70), SS4(0xff137330), SS4(0xfff3c74c), SS4(0x0019118b),
+    SS4(0x027c1434), SS4(0xf9c2a8d8), SS4(0xff589157), SS4(0xfffb9ac7)
+};
+
+const int32_t ff_sbc_proto_8_80m0[] = {
+    SS8(0x00000000), SS8(0xfe8d1970), SS8(0xee979f00), SS8(0x11686100),
+    SS8(0x0172e690), SS8(0xfff5bd1a), SS8(0xfdf1c8d4), SS8(0xeac182c0),
+    SS8(0x0d9daee0), SS8(0x00e530da), SS8(0xffe9811d), SS8(0xfd52986c),
+    SS8(0xe7054ca0), SS8(0x0a00d410), SS8(0x006c1de4), SS8(0xffdba705),
+    SS8(0xfcbc98e8), SS8(0xe3889d20), SS8(0x06af2308), SS8(0x000bb7db),
+    SS8(0xffca00ed), SS8(0xfc3fbb68), SS8(0xe071bc00), SS8(0x03bf7948),
+    SS8(0xffc4e05c), SS8(0xffb54b3b), SS8(0xfbedadc0), SS8(0xdde26200),
+    SS8(0x0142291c), SS8(0xff960e94), SS8(0xff9f3e17), SS8(0xfbd8f358),
+    SS8(0xdbf79400), SS8(0xff405e01), SS8(0xff7d4914), SS8(0xff8b1a31),
+    SS8(0xfc1417b8), SS8(0xdac7bb40), SS8(0xfdbb828c), SS8(0xff762170)
+};
+
+const int32_t ff_sbc_proto_8_80m1[] = {
+    SS8(0xff7c272c), SS8(0xfcb02620), SS8(0xda612700), SS8(0xfcb02620),
+    SS8(0xff7c272c), SS8(0xff762170), SS8(0xfdbb828c), SS8(0xdac7bb40),
+    SS8(0xfc1417b8), SS8(0xff8b1a31), SS8(0xff7d4914), SS8(0xff405e01),
+    SS8(0xdbf79400), SS8(0xfbd8f358), SS8(0xff9f3e17), SS8(0xff960e94),
+    SS8(0x0142291c), SS8(0xdde26200), SS8(0xfbedadc0), SS8(0xffb54b3b),
+    SS8(0xffc4e05c), SS8(0x03bf7948), SS8(0xe071bc00), SS8(0xfc3fbb68),
+    SS8(0xffca00ed), SS8(0x000bb7db), SS8(0x06af2308), SS8(0xe3889d20),
+    SS8(0xfcbc98e8), SS8(0xffdba705), SS8(0x006c1de4), SS8(0x0a00d410),
+    SS8(0xe7054ca0), SS8(0xfd52986c), SS8(0xffe9811d), SS8(0x00e530da),
+    SS8(0x0d9daee0), SS8(0xeac182c0), SS8(0xfdf1c8d4), SS8(0xfff5bd1a)
+};
+
+const int32_t ff_synmatrix4[8][4] = {
+    { SN4(0x05a82798), SN4(0xfa57d868), SN4(0xfa57d868), SN4(0x05a82798) },
+    { SN4(0x030fbc54), SN4(0xf89be510), SN4(0x07641af0), SN4(0xfcf043ac) },
+    { SN4(0x00000000), SN4(0x00000000), SN4(0x00000000), SN4(0x00000000) },
+    { SN4(0xfcf043ac), SN4(0x07641af0), SN4(0xf89be510), SN4(0x030fbc54) },
+    { SN4(0xfa57d868), SN4(0x05a82798), SN4(0x05a82798), SN4(0xfa57d868) },
+    { SN4(0xf89be510), SN4(0xfcf043ac), SN4(0x030fbc54), SN4(0x07641af0) },
+    { SN4(0xf8000000), SN4(0xf8000000), SN4(0xf8000000), SN4(0xf8000000) },
+    { SN4(0xf89be510), SN4(0xfcf043ac), SN4(0x030fbc54), SN4(0x07641af0) }
+};
+
+const int32_t ff_synmatrix8[16][8] = {
+    { SN8(0x05a82798), SN8(0xfa57d868), SN8(0xfa57d868), SN8(0x05a82798),
+      SN8(0x05a82798), SN8(0xfa57d868), SN8(0xfa57d868), SN8(0x05a82798) },
+    { SN8(0x0471ced0), SN8(0xf8275a10), SN8(0x018f8b84), SN8(0x06a6d988),
+      SN8(0xf9592678), SN8(0xfe70747c), SN8(0x07d8a5f0), SN8(0xfb8e3130) },
+    { SN8(0x030fbc54), SN8(0xf89be510), SN8(0x07641af0), SN8(0xfcf043ac),
+      SN8(0xfcf043ac), SN8(0x07641af0), SN8(0xf89be510), SN8(0x030fbc54) },
+    { SN8(0x018f8b84), SN8(0xfb8e3130), SN8(0x06a6d988), SN8(0xf8275a10),
+      SN8(0x07d8a5f0), SN8(0xf9592678), SN8(0x0471ced0), SN8(0xfe70747c) },
+    { SN8(0x00000000), SN8(0x00000000), SN8(0x00000000), SN8(0x00000000),
+      SN8(0x00000000), SN8(0x00000000), SN8(0x00000000), SN8(0x00000000) },
+    { SN8(0xfe70747c), SN8(0x0471ced0), SN8(0xf9592678), SN8(0x07d8a5f0),
+      SN8(0xf8275a10), SN8(0x06a6d988), SN8(0xfb8e3130), SN8(0x018f8b84) },
+    { SN8(0xfcf043ac), SN8(0x07641af0), SN8(0xf89be510), SN8(0x030fbc54),
+      SN8(0x030fbc54), SN8(0xf89be510), SN8(0x07641af0), SN8(0xfcf043ac) },
+    { SN8(0xfb8e3130), SN8(0x07d8a5f0), SN8(0xfe70747c), SN8(0xf9592678),
+      SN8(0x06a6d988), SN8(0x018f8b84), SN8(0xf8275a10), SN8(0x0471ced0) },
+    { SN8(0xfa57d868), SN8(0x05a82798), SN8(0x05a82798), SN8(0xfa57d868),
+      SN8(0xfa57d868), SN8(0x05a82798), SN8(0x05a82798), SN8(0xfa57d868) },
+    { SN8(0xf9592678), SN8(0x018f8b84), SN8(0x07d8a5f0), SN8(0x0471ced0),
+      SN8(0xfb8e3130), SN8(0xf8275a10), SN8(0xfe70747c), SN8(0x06a6d988) },
+    { SN8(0xf89be510), SN8(0xfcf043ac), SN8(0x030fbc54), SN8(0x07641af0),
+      SN8(0x07641af0), SN8(0x030fbc54), SN8(0xfcf043ac), SN8(0xf89be510) },
+    { SN8(0xf8275a10), SN8(0xf9592678), SN8(0xfb8e3130), SN8(0xfe70747c),
+      SN8(0x018f8b84), SN8(0x0471ced0), SN8(0x06a6d988), SN8(0x07d8a5f0) },
+    { SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000),
+      SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000) },
+    { SN8(0xf8275a10), SN8(0xf9592678), SN8(0xfb8e3130), SN8(0xfe70747c),
+      SN8(0x018f8b84), SN8(0x0471ced0), SN8(0x06a6d988), SN8(0x07d8a5f0) },
+    { SN8(0xf89be510), SN8(0xfcf043ac), SN8(0x030fbc54), SN8(0x07641af0),
+      SN8(0x07641af0), SN8(0x030fbc54), SN8(0xfcf043ac), SN8(0xf89be510) },
+    { SN8(0xf9592678), SN8(0x018f8b84), SN8(0x07d8a5f0), SN8(0x0471ced0),
+      SN8(0xfb8e3130), SN8(0xf8275a10), SN8(0xfe70747c), SN8(0x06a6d988) }
+};
diff --git a/libavcodec/sbcdec_data.h b/libavcodec/sbcdec_data.h
new file mode 100644
index 0000000000..1b79d1de23
--- /dev/null
+++ b/libavcodec/sbcdec_data.h
@@ -0,0 +1,44 @@ 
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC decoder tables
+ */
+
+#ifndef AVCODEC_SBCDEC_DATA_H
+#define AVCODEC_SBCDEC_DATA_H
+
+#include <stdint.h>
+
+extern const int32_t ff_sbc_proto_4_40m0[];
+extern const int32_t ff_sbc_proto_4_40m1[];
+extern const int32_t ff_sbc_proto_8_80m0[];
+extern const int32_t ff_sbc_proto_8_80m1[];
+extern const int32_t ff_synmatrix4[8][4];
+extern const int32_t ff_synmatrix8[16][8];
+
+#endif /* AVCODEC_SBCDEC_DATA_H */
diff --git a/libavcodec/sbcdsp.c b/libavcodec/sbcdsp.c
new file mode 100644
index 0000000000..0cdf5ef5aa
--- /dev/null
+++ b/libavcodec/sbcdsp.c
@@ -0,0 +1,569 @@ 
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2012-2013  Intel Corporation
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC basic "building bricks"
+ */
+
+#include <stdint.h>
+#include <limits.h>
+#include <string.h>
+#include "libavutil/common.h"
+#include "libavutil/intmath.h"
+#include "libavutil/intreadwrite.h"
+#include "sbc.h"
+#include "sbcdsp.h"
+#include "sbcdsp_data.h"
+
+/*
+ * A reference C code of analysis filter with SIMD-friendly tables
+ * reordering and code layout. This code can be used to develop platform
+ * specific SIMD optimizations. Also it may be used as some kind of test
+ * for compiler autovectorization capabilities (who knows, if the compiler
+ * is very good at this stuff, hand optimized assembly may be not strictly
+ * needed for some platform).
+ *
+ * Note: It is also possible to make a simple variant of analysis filter,
+ * which needs only a single constants table without taking care about
+ * even/odd cases. This simple variant of filter can be implemented without
+ * input data permutation. The only thing that would be lost is the
+ * possibility to use pairwise SIMD multiplications. But for some simple
+ * CPU cores without SIMD extensions it can be useful. If anybody is
+ * interested in implementing such variant of a filter, sourcecode from
+ * bluez versions 4.26/4.27 can be used as a reference and the history of
+ * the changes in git repository done around that time may be worth checking.
+ */
+
+static void sbc_analyze_4_simd(const int16_t *in, int32_t *out,
+                               const int16_t *consts)
+{
+    int32_t t1[4];
+    int16_t t2[4];
+    int hop = 0;
+
+    /* rounding coefficient */
+    t1[0] = t1[1] = t1[2] = t1[3] =
+        (int32_t) 1 << (SBC_PROTO_FIXED4_SCALE - 1);
+
+    /* low pass polyphase filter */
+    for (hop = 0; hop < 40; hop += 8) {
+        t1[0] += (int32_t) in[hop] * consts[hop];
+        t1[0] += (int32_t) in[hop + 1] * consts[hop + 1];
+        t1[1] += (int32_t) in[hop + 2] * consts[hop + 2];
+        t1[1] += (int32_t) in[hop + 3] * consts[hop + 3];
+        t1[2] += (int32_t) in[hop + 4] * consts[hop + 4];
+        t1[2] += (int32_t) in[hop + 5] * consts[hop + 5];
+        t1[3] += (int32_t) in[hop + 6] * consts[hop + 6];
+        t1[3] += (int32_t) in[hop + 7] * consts[hop + 7];
+    }
+
+    /* scaling */
+    t2[0] = t1[0] >> SBC_PROTO_FIXED4_SCALE;
+    t2[1] = t1[1] >> SBC_PROTO_FIXED4_SCALE;
+    t2[2] = t1[2] >> SBC_PROTO_FIXED4_SCALE;
+    t2[3] = t1[3] >> SBC_PROTO_FIXED4_SCALE;
+
+    /* do the cos transform */
+    t1[0]  = (int32_t) t2[0] * consts[40 + 0];
+    t1[0] += (int32_t) t2[1] * consts[40 + 1];
+    t1[1]  = (int32_t) t2[0] * consts[40 + 2];
+    t1[1] += (int32_t) t2[1] * consts[40 + 3];
+    t1[2]  = (int32_t) t2[0] * consts[40 + 4];
+    t1[2] += (int32_t) t2[1] * consts[40 + 5];
+    t1[3]  = (int32_t) t2[0] * consts[40 + 6];
+    t1[3] += (int32_t) t2[1] * consts[40 + 7];
+
+    t1[0] += (int32_t) t2[2] * consts[40 + 8];
+    t1[0] += (int32_t) t2[3] * consts[40 + 9];
+    t1[1] += (int32_t) t2[2] * consts[40 + 10];
+    t1[1] += (int32_t) t2[3] * consts[40 + 11];
+    t1[2] += (int32_t) t2[2] * consts[40 + 12];
+    t1[2] += (int32_t) t2[3] * consts[40 + 13];
+    t1[3] += (int32_t) t2[2] * consts[40 + 14];
+    t1[3] += (int32_t) t2[3] * consts[40 + 15];
+
+    out[0] = t1[0] >>
+        (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS);
+    out[1] = t1[1] >>
+        (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS);
+    out[2] = t1[2] >>
+        (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS);
+    out[3] = t1[3] >>
+        (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS);
+}
+
+static void sbc_analyze_8_simd(const int16_t *in, int32_t *out,
+                               const int16_t *consts)
+{
+    int32_t t1[8];
+    int16_t t2[8];
+    int i, hop;
+
+    /* rounding coefficient */
+    t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] =
+        (int32_t) 1 << (SBC_PROTO_FIXED8_SCALE-1);
+
+    /* low pass polyphase filter */
+    for (hop = 0; hop < 80; hop += 16) {
+        t1[0] += (int32_t) in[hop] * consts[hop];
+        t1[0] += (int32_t) in[hop + 1] * consts[hop + 1];
+        t1[1] += (int32_t) in[hop + 2] * consts[hop + 2];
+        t1[1] += (int32_t) in[hop + 3] * consts[hop + 3];
+        t1[2] += (int32_t) in[hop + 4] * consts[hop + 4];
+        t1[2] += (int32_t) in[hop + 5] * consts[hop + 5];
+        t1[3] += (int32_t) in[hop + 6] * consts[hop + 6];
+        t1[3] += (int32_t) in[hop + 7] * consts[hop + 7];
+        t1[4] += (int32_t) in[hop + 8] * consts[hop + 8];
+        t1[4] += (int32_t) in[hop + 9] * consts[hop + 9];
+        t1[5] += (int32_t) in[hop + 10] * consts[hop + 10];
+        t1[5] += (int32_t) in[hop + 11] * consts[hop + 11];
+        t1[6] += (int32_t) in[hop + 12] * consts[hop + 12];
+        t1[6] += (int32_t) in[hop + 13] * consts[hop + 13];
+        t1[7] += (int32_t) in[hop + 14] * consts[hop + 14];
+        t1[7] += (int32_t) in[hop + 15] * consts[hop + 15];
+    }
+
+    /* scaling */
+    t2[0] = t1[0] >> SBC_PROTO_FIXED8_SCALE;
+    t2[1] = t1[1] >> SBC_PROTO_FIXED8_SCALE;
+    t2[2] = t1[2] >> SBC_PROTO_FIXED8_SCALE;
+    t2[3] = t1[3] >> SBC_PROTO_FIXED8_SCALE;
+    t2[4] = t1[4] >> SBC_PROTO_FIXED8_SCALE;
+    t2[5] = t1[5] >> SBC_PROTO_FIXED8_SCALE;
+    t2[6] = t1[6] >> SBC_PROTO_FIXED8_SCALE;
+    t2[7] = t1[7] >> SBC_PROTO_FIXED8_SCALE;
+
+
+    /* do the cos transform */
+    t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] = 0;
+
+    for (i = 0; i < 4; i++) {
+        t1[0] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 0];
+        t1[0] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 1];
+        t1[1] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 2];
+        t1[1] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 3];
+        t1[2] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 4];
+        t1[2] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 5];
+        t1[3] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 6];
+        t1[3] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 7];
+        t1[4] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 8];
+        t1[4] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 9];
+        t1[5] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 10];
+        t1[5] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 11];
+        t1[6] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 12];
+        t1[6] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 13];
+        t1[7] += (int32_t) t2[i * 2 + 0] * consts[80 + i * 16 + 14];
+        t1[7] += (int32_t) t2[i * 2 + 1] * consts[80 + i * 16 + 15];
+    }
+
+    for (i = 0; i < 8; i++)
+        out[i] = t1[i] >>
+            (SBC_COS_TABLE_FIXED8_SCALE - SCALE_OUT_BITS);
+}
+
+static inline void sbc_analyze_4b_4s_simd(SBCDSPContext *s,
+                                          int16_t *x, int32_t *out, int out_stride)
+{
+    /* Analyze blocks */
+    s->sbc_analyze_4(x + 12, out, ff_sbcdsp_analysis_consts_fixed4_simd_odd);
+    out += out_stride;
+    s->sbc_analyze_4(x + 8, out, ff_sbcdsp_analysis_consts_fixed4_simd_even);
+    out += out_stride;
+    s->sbc_analyze_4(x + 4, out, ff_sbcdsp_analysis_consts_fixed4_simd_odd);
+    out += out_stride;
+    s->sbc_analyze_4(x + 0, out, ff_sbcdsp_analysis_consts_fixed4_simd_even);
+
+    emms_c();
+}
+
+static inline void sbc_analyze_4b_8s_simd(SBCDSPContext *s,
+                                          int16_t *x, int32_t *out, int out_stride)
+{
+    /* Analyze blocks */
+    s->sbc_analyze_8(x + 24, out, ff_sbcdsp_analysis_consts_fixed8_simd_odd);
+    out += out_stride;
+    s->sbc_analyze_8(x + 16, out, ff_sbcdsp_analysis_consts_fixed8_simd_even);
+    out += out_stride;
+    s->sbc_analyze_8(x + 8, out, ff_sbcdsp_analysis_consts_fixed8_simd_odd);
+    out += out_stride;
+    s->sbc_analyze_8(x + 0, out, ff_sbcdsp_analysis_consts_fixed8_simd_even);
+
+    emms_c();
+}
+
+static inline void sbc_analyze_1b_8s_simd_even(SBCDSPContext *s,
+                                               int16_t *x, int32_t *out,
+                                               int out_stride);
+
+static inline void sbc_analyze_1b_8s_simd_odd(SBCDSPContext *s,
+                                              int16_t *x, int32_t *out,
+                                              int out_stride)
+{
+    s->sbc_analyze_8(x, out, ff_sbcdsp_analysis_consts_fixed8_simd_odd);
+    s->sbc_analyze_8s = sbc_analyze_1b_8s_simd_even;
+
+    emms_c();
+}
+
+static inline void sbc_analyze_1b_8s_simd_even(SBCDSPContext *s,
+                                               int16_t *x, int32_t *out,
+                                               int out_stride)
+{
+    s->sbc_analyze_8(x, out, ff_sbcdsp_analysis_consts_fixed8_simd_even);
+    s->sbc_analyze_8s = sbc_analyze_1b_8s_simd_odd;
+
+    emms_c();
+}
+
+#define PCM(i)  AV_RN16(pcm + 2*(i))
+
+/*
+ * Internal helper functions for input data processing. In order to get
+ * optimal performance, it is important to have "nsamples" and "nchannels"
+ * arguments used with this inline function as compile time constants.
+ */
+
+static av_always_inline int sbc_encoder_process_input_s4_internal(
+    int position, const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE],
+    int nsamples, int nchannels)
+{
+    /* handle X buffer wraparound */
+    if (position < nsamples) {
+        if (nchannels > 0)
+            memcpy(&X[0][SBC_X_BUFFER_SIZE - 40], &X[0][position],
+                            36 * sizeof(int16_t));
+        if (nchannels > 1)
+            memcpy(&X[1][SBC_X_BUFFER_SIZE - 40], &X[1][position],
+                            36 * sizeof(int16_t));
+        position = SBC_X_BUFFER_SIZE - 40;
+    }
+
+    /* copy/permutate audio samples */
+    while ((nsamples -= 8) >= 0) {
+        position -= 8;
+        if (nchannels > 0) {
+            int16_t *x = &X[0][position];
+            x[0]  = PCM(0 + 7 * nchannels);
+            x[1]  = PCM(0 + 3 * nchannels);
+            x[2]  = PCM(0 + 6 * nchannels);
+            x[3]  = PCM(0 + 4 * nchannels);
+            x[4]  = PCM(0 + 0 * nchannels);
+            x[5]  = PCM(0 + 2 * nchannels);
+            x[6]  = PCM(0 + 1 * nchannels);
+            x[7]  = PCM(0 + 5 * nchannels);
+        }
+        if (nchannels > 1) {
+            int16_t *x = &X[1][position];
+            x[0]  = PCM(1 + 7 * nchannels);
+            x[1]  = PCM(1 + 3 * nchannels);
+            x[2]  = PCM(1 + 6 * nchannels);
+            x[3]  = PCM(1 + 4 * nchannels);
+            x[4]  = PCM(1 + 0 * nchannels);
+            x[5]  = PCM(1 + 2 * nchannels);
+            x[6]  = PCM(1 + 1 * nchannels);
+            x[7]  = PCM(1 + 5 * nchannels);
+        }
+        pcm += 16 * nchannels;
+    }
+
+    return position;
+}
+
+static av_always_inline int sbc_encoder_process_input_s8_internal(
+    int position, const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE],
+    int nsamples, int nchannels)
+{
+    /* handle X buffer wraparound */
+    if (position < nsamples) {
+        if (nchannels > 0)
+            memcpy(&X[0][SBC_X_BUFFER_SIZE - 72], &X[0][position],
+                            72 * sizeof(int16_t));
+        if (nchannels > 1)
+            memcpy(&X[1][SBC_X_BUFFER_SIZE - 72], &X[1][position],
+                            72 * sizeof(int16_t));
+        position = SBC_X_BUFFER_SIZE - 72;
+    }
+
+    if (position % 16 == 8) {
+        position -= 8;
+        nsamples -= 8;
+        if (nchannels > 0) {
+            int16_t *x = &X[0][position];
+            x[0]  = PCM(0 + (15-8) * nchannels);
+            x[2]  = PCM(0 + (14-8) * nchannels);
+            x[3]  = PCM(0 + (8-8) * nchannels);
+            x[4]  = PCM(0 + (13-8) * nchannels);
+            x[5]  = PCM(0 + (9-8) * nchannels);
+            x[6]  = PCM(0 + (12-8) * nchannels);
+            x[7]  = PCM(0 + (10-8) * nchannels);
+            x[8]  = PCM(0 + (11-8) * nchannels);
+        }
+        if (nchannels > 1) {
+            int16_t *x = &X[1][position];
+            x[0]  = PCM(1 + (15-8) * nchannels);
+            x[2]  = PCM(1 + (14-8) * nchannels);
+            x[3]  = PCM(1 + (8-8) * nchannels);
+            x[4]  = PCM(1 + (13-8) * nchannels);
+            x[5]  = PCM(1 + (9-8) * nchannels);
+            x[6]  = PCM(1 + (12-8) * nchannels);
+            x[7]  = PCM(1 + (10-8) * nchannels);
+            x[8]  = PCM(1 + (11-8) * nchannels);
+        }
+
+        pcm += 16 * nchannels;
+    }
+
+    /* copy/permutate audio samples */
+    while (nsamples >= 16) {
+        position -= 16;
+        if (nchannels > 0) {
+            int16_t *x = &X[0][position];
+            x[0]  = PCM(0 + 15 * nchannels);
+            x[1]  = PCM(0 + 7 * nchannels);
+            x[2]  = PCM(0 + 14 * nchannels);
+            x[3]  = PCM(0 + 8 * nchannels);
+            x[4]  = PCM(0 + 13 * nchannels);
+            x[5]  = PCM(0 + 9 * nchannels);
+            x[6]  = PCM(0 + 12 * nchannels);
+            x[7]  = PCM(0 + 10 * nchannels);
+            x[8]  = PCM(0 + 11 * nchannels);
+            x[9]  = PCM(0 + 3 * nchannels);
+            x[10] = PCM(0 + 6 * nchannels);
+            x[11] = PCM(0 + 0 * nchannels);
+            x[12] = PCM(0 + 5 * nchannels);
+            x[13] = PCM(0 + 1 * nchannels);
+            x[14] = PCM(0 + 4 * nchannels);
+            x[15] = PCM(0 + 2 * nchannels);
+        }
+        if (nchannels > 1) {
+            int16_t *x = &X[1][position];
+            x[0]  = PCM(1 + 15 * nchannels);
+            x[1]  = PCM(1 + 7 * nchannels);
+            x[2]  = PCM(1 + 14 * nchannels);
+            x[3]  = PCM(1 + 8 * nchannels);
+            x[4]  = PCM(1 + 13 * nchannels);
+            x[5]  = PCM(1 + 9 * nchannels);
+            x[6]  = PCM(1 + 12 * nchannels);
+            x[7]  = PCM(1 + 10 * nchannels);
+            x[8]  = PCM(1 + 11 * nchannels);
+            x[9]  = PCM(1 + 3 * nchannels);
+            x[10] = PCM(1 + 6 * nchannels);
+            x[11] = PCM(1 + 0 * nchannels);
+            x[12] = PCM(1 + 5 * nchannels);
+            x[13] = PCM(1 + 1 * nchannels);
+            x[14] = PCM(1 + 4 * nchannels);
+            x[15] = PCM(1 + 2 * nchannels);
+        }
+        pcm += 32 * nchannels;
+        nsamples -= 16;
+    }
+
+    if (nsamples == 8) {
+        position -= 8;
+        if (nchannels > 0) {
+            int16_t *x = &X[0][position];
+            x[-7] = PCM(0 + 7 * nchannels);
+            x[1]  = PCM(0 + 3 * nchannels);
+            x[2]  = PCM(0 + 6 * nchannels);
+            x[3]  = PCM(0 + 0 * nchannels);
+            x[4]  = PCM(0 + 5 * nchannels);
+            x[5]  = PCM(0 + 1 * nchannels);
+            x[6]  = PCM(0 + 4 * nchannels);
+            x[7]  = PCM(0 + 2 * nchannels);
+        }
+        if (nchannels > 1) {
+            int16_t *x = &X[1][position];
+            x[-7] = PCM(1 + 7 * nchannels);
+            x[1]  = PCM(1 + 3 * nchannels);
+            x[2]  = PCM(1 + 6 * nchannels);
+            x[3]  = PCM(1 + 0 * nchannels);
+            x[4]  = PCM(1 + 5 * nchannels);
+            x[5]  = PCM(1 + 1 * nchannels);
+            x[6]  = PCM(1 + 4 * nchannels);
+            x[7]  = PCM(1 + 2 * nchannels);
+        }
+    }
+
+    return position;
+}
+
+/*
+ * Input data processing functions. The data is endian converted if needed,
+ * channels are deintrleaved and audio samples are reordered for use in
+ * SIMD-friendly analysis filter function. The results are put into "X"
+ * array, getting appended to the previous data (or it is better to say
+ * prepended, as the buffer is filled from top to bottom). Old data is
+ * discarded when neededed, but availability of (10 * nrof_subbands)
+ * contiguous samples is always guaranteed for the input to the analysis
+ * filter. This is achieved by copying a sufficient part of old data
+ * to the top of the buffer on buffer wraparound.
+ */
+
+static int sbc_enc_process_input_4s(int position, const uint8_t *pcm,
+                                    int16_t X[2][SBC_X_BUFFER_SIZE],
+                                    int nsamples, int nchannels)
+{
+    if (nchannels > 1)
+        return sbc_encoder_process_input_s4_internal(
+            position, pcm, X, nsamples, 2);
+    else
+        return sbc_encoder_process_input_s4_internal(
+            position, pcm, X, nsamples, 1);
+}
+
+static int sbc_enc_process_input_8s(int position, const uint8_t *pcm,
+                                    int16_t X[2][SBC_X_BUFFER_SIZE],
+                                    int nsamples, int nchannels)
+{
+    if (nchannels > 1)
+        return sbc_encoder_process_input_s8_internal(
+            position, pcm, X, nsamples, 2);
+    else
+        return sbc_encoder_process_input_s8_internal(
+            position, pcm, X, nsamples, 1);
+}
+
+static void sbc_calc_scalefactors(int32_t sb_sample_f[16][2][8],
+                                  uint32_t scale_factor[2][8],
+                                  int blocks, int channels, int subbands)
+{
+    int ch, sb, blk;
+    for (ch = 0; ch < channels; ch++) {
+        for (sb = 0; sb < subbands; sb++) {
+            uint32_t x = 1 << SCALE_OUT_BITS;
+            for (blk = 0; blk < blocks; blk++) {
+                int32_t tmp = FFABS(sb_sample_f[blk][ch][sb]);
+                if (tmp != 0)
+                    x |= tmp - 1;
+            }
+            scale_factor[ch][sb] = (31 - SCALE_OUT_BITS) -
+                ff_clz(x);
+        }
+    }
+}
+
+static int sbc_calc_scalefactors_j(int32_t sb_sample_f[16][2][8],
+                                   uint32_t scale_factor[2][8],
+                                   int blocks, int subbands)
+{
+    int blk, joint = 0;
+    int32_t tmp0, tmp1;
+    uint32_t x, y;
+
+    /* last subband does not use joint stereo */
+    int sb = subbands - 1;
+    x = 1 << SCALE_OUT_BITS;
+    y = 1 << SCALE_OUT_BITS;
+    for (blk = 0; blk < blocks; blk++) {
+        tmp0 = FFABS(sb_sample_f[blk][0][sb]);
+        tmp1 = FFABS(sb_sample_f[blk][1][sb]);
+        if (tmp0 != 0)
+            x |= tmp0 - 1;
+        if (tmp1 != 0)
+            y |= tmp1 - 1;
+    }
+    scale_factor[0][sb] = (31 - SCALE_OUT_BITS) - ff_clz(x);
+    scale_factor[1][sb] = (31 - SCALE_OUT_BITS) - ff_clz(y);
+
+    /* the rest of subbands can use joint stereo */
+    while (--sb >= 0) {
+        int32_t sb_sample_j[16][2];
+        x = 1 << SCALE_OUT_BITS;
+        y = 1 << SCALE_OUT_BITS;
+        for (blk = 0; blk < blocks; blk++) {
+            tmp0 = sb_sample_f[blk][0][sb];
+            tmp1 = sb_sample_f[blk][1][sb];
+            sb_sample_j[blk][0] = (tmp0 >> 1) + (tmp1 >> 1);
+            sb_sample_j[blk][1] = (tmp0 >> 1) - (tmp1 >> 1);
+            tmp0 = FFABS(tmp0);
+            tmp1 = FFABS(tmp1);
+            if (tmp0 != 0)
+                x |= tmp0 - 1;
+            if (tmp1 != 0)
+                y |= tmp1 - 1;
+        }
+        scale_factor[0][sb] = (31 - SCALE_OUT_BITS) -
+            ff_clz(x);
+        scale_factor[1][sb] = (31 - SCALE_OUT_BITS) -
+            ff_clz(y);
+        x = 1 << SCALE_OUT_BITS;
+        y = 1 << SCALE_OUT_BITS;
+        for (blk = 0; blk < blocks; blk++) {
+            tmp0 = FFABS(sb_sample_j[blk][0]);
+            tmp1 = FFABS(sb_sample_j[blk][1]);
+            if (tmp0 != 0)
+                x |= tmp0 - 1;
+            if (tmp1 != 0)
+                y |= tmp1 - 1;
+        }
+        x = (31 - SCALE_OUT_BITS) - ff_clz(x);
+        y = (31 - SCALE_OUT_BITS) - ff_clz(y);
+
+        /* decide whether to use joint stereo for this subband */
+        if ((scale_factor[0][sb] + scale_factor[1][sb]) > x + y) {
+            joint |= 1 << (subbands - 1 - sb);
+            scale_factor[0][sb] = x;
+            scale_factor[1][sb] = y;
+            for (blk = 0; blk < blocks; blk++) {
+                sb_sample_f[blk][0][sb] = sb_sample_j[blk][0];
+                sb_sample_f[blk][1][sb] = sb_sample_j[blk][1];
+            }
+        }
+    }
+
+    /* bitmask with the information about subbands using joint stereo */
+    return joint;
+}
+
+/*
+ * Detect CPU features and setup function pointers
+ */
+av_cold void ff_sbcdsp_init(SBCDSPContext *s)
+{
+    /* Default implementation for analyze functions */
+    s->sbc_analyze_4 = sbc_analyze_4_simd;
+    s->sbc_analyze_8 = sbc_analyze_8_simd;
+    s->sbc_analyze_4s = sbc_analyze_4b_4s_simd;
+    if (s->increment == 1)
+        s->sbc_analyze_8s = sbc_analyze_1b_8s_simd_odd;
+    else
+        s->sbc_analyze_8s = sbc_analyze_4b_8s_simd;
+
+    /* Default implementation for input reordering / deinterleaving */
+    s->sbc_enc_process_input_4s = sbc_enc_process_input_4s;
+    s->sbc_enc_process_input_8s = sbc_enc_process_input_8s;
+
+    /* Default implementation for scale factors calculation */
+    s->sbc_calc_scalefactors = sbc_calc_scalefactors;
+    s->sbc_calc_scalefactors_j = sbc_calc_scalefactors_j;
+
+    if (ARCH_ARM)
+        ff_sbcdsp_init_arm(s);
+    if (ARCH_X86)
+        ff_sbcdsp_init_x86(s);
+}
diff --git a/libavcodec/sbcdsp.h b/libavcodec/sbcdsp.h
new file mode 100644
index 0000000000..334c058e6d
--- /dev/null
+++ b/libavcodec/sbcdsp.h
@@ -0,0 +1,86 @@ 
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC basic "building bricks"
+ */
+
+#ifndef AVCODEC_SBCDSP_H
+#define AVCODEC_SBCDSP_H
+
+#include "sbc.h"
+#include "sbcdsp_data.h"
+
+#define SCALE_OUT_BITS 15
+#define SBC_X_BUFFER_SIZE 328
+
+typedef struct sbc_dsp_context SBCDSPContext;
+
+struct sbc_dsp_context {
+    int position;
+    /* Number of consecutive blocks handled by the encoder */
+    uint8_t increment;
+    DECLARE_ALIGNED(SBC_ALIGN, int16_t, X)[2][SBC_X_BUFFER_SIZE];
+    void (*sbc_analyze_4)(const int16_t *in, int32_t *out, const int16_t *consts);
+    void (*sbc_analyze_8)(const int16_t *in, int32_t *out, const int16_t *consts);
+    /* Polyphase analysis filter for 4 subbands configuration,
+     * it handles "increment" blocks at once */
+    void (*sbc_analyze_4s)(SBCDSPContext *s,
+                           int16_t *x, int32_t *out, int out_stride);
+    /* Polyphase analysis filter for 8 subbands configuration,
+     * it handles "increment" blocks at once */
+    void (*sbc_analyze_8s)(SBCDSPContext *s,
+                           int16_t *x, int32_t *out, int out_stride);
+    /* Process input data (deinterleave, endian conversion, reordering),
+     * depending on the number of subbands and input data byte order */
+    int (*sbc_enc_process_input_4s)(int position, const uint8_t *pcm,
+                                    int16_t X[2][SBC_X_BUFFER_SIZE],
+                                    int nsamples, int nchannels);
+    int (*sbc_enc_process_input_8s)(int position, const uint8_t *pcm,
+                                    int16_t X[2][SBC_X_BUFFER_SIZE],
+                                    int nsamples, int nchannels);
+    /* Scale factors calculation */
+    void (*sbc_calc_scalefactors)(int32_t sb_sample_f[16][2][8],
+                                  uint32_t scale_factor[2][8],
+                                  int blocks, int channels, int subbands);
+    /* Scale factors calculation with joint stereo support */
+    int (*sbc_calc_scalefactors_j)(int32_t sb_sample_f[16][2][8],
+                                   uint32_t scale_factor[2][8],
+                                   int blocks, int subbands);
+};
+
+/*
+ * Initialize pointers to the functions which are the basic "building bricks"
+ * of SBC codec. Best implementation is selected based on target CPU
+ * capabilities.
+ */
+void ff_sbcdsp_init(SBCDSPContext *s);
+
+void ff_sbcdsp_init_arm(SBCDSPContext *s);
+void ff_sbcdsp_init_x86(SBCDSPContext *s);
+
+#endif /* AVCODEC_SBCDSP_H */
diff --git a/libavcodec/sbcdsp_data.c b/libavcodec/sbcdsp_data.c
new file mode 100644
index 0000000000..3007a23bc7
--- /dev/null
+++ b/libavcodec/sbcdsp_data.c
@@ -0,0 +1,335 @@ 
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * miscellaneous SBC tables
+ */
+
+#include "sbcdsp_data.h"
+
+#define F_PROTO4(x) (int32_t) ((x * 2) * \
+    ((int32_t) 1 << (sizeof(int16_t) * CHAR_BIT - 1)) + 0.5)
+#define F_COS4(x) (int32_t) ((x) * \
+    ((int32_t) 1 << (sizeof(int16_t) * CHAR_BIT - 1)) + 0.5)
+#define F_PROTO8(x) (int32_t) ((x * 2) * \
+    ((int32_t) 1 << (sizeof(int16_t) * CHAR_BIT - 1)) + 0.5)
+#define F_COS8(x) (int32_t) ((x) * \
+    ((int32_t) 1 << (sizeof(int16_t) * CHAR_BIT - 1)) + 0.5)
+
+/*
+ * Constant tables for the use in SIMD optimized analysis filters
+ * Each table consists of two parts:
+ * 1. reordered "proto" table
+ * 2. reordered "cos" table
+ *
+ * Due to non-symmetrical reordering, separate tables for "even"
+ * and "odd" cases are needed
+ */
+
+DECLARE_ALIGNED(SBC_ALIGN, const int16_t, ff_sbcdsp_analysis_consts_fixed4_simd_even)[40 + 16] = {
+#define C0 1.0932568993
+#define C1 1.3056875580
+#define C2 1.3056875580
+#define C3 1.6772280856
+
+#define F(x) F_PROTO4(x)
+     F(0.00000000E+00 * C0),  F(3.83720193E-03 * C0),
+     F(5.36548976E-04 * C1),  F(2.73370904E-03 * C1),
+     F(3.06012286E-03 * C2),  F(3.89205149E-03 * C2),
+     F(0.00000000E+00 * C3), -F(1.49188357E-03 * C3),
+     F(1.09137620E-02 * C0),  F(2.58767811E-02 * C0),
+     F(2.04385087E-02 * C1),  F(3.21939290E-02 * C1),
+     F(7.76463494E-02 * C2),  F(6.13245186E-03 * C2),
+     F(0.00000000E+00 * C3), -F(2.88757392E-02 * C3),
+     F(1.35593274E-01 * C0),  F(2.94315332E-01 * C0),
+     F(1.94987841E-01 * C1),  F(2.81828203E-01 * C1),
+    -F(1.94987841E-01 * C2),  F(2.81828203E-01 * C2),
+     F(0.00000000E+00 * C3), -F(2.46636662E-01 * C3),
+    -F(1.35593274E-01 * C0),  F(2.58767811E-02 * C0),
+    -F(7.76463494E-02 * C1),  F(6.13245186E-03 * C1),
+    -F(2.04385087E-02 * C2),  F(3.21939290E-02 * C2),
+     F(0.00000000E+00 * C3),  F(2.88217274E-02 * C3),
+    -F(1.09137620E-02 * C0),  F(3.83720193E-03 * C0),
+    -F(3.06012286E-03 * C1),  F(3.89205149E-03 * C1),
+    -F(5.36548976E-04 * C2),  F(2.73370904E-03 * C2),
+     F(0.00000000E+00 * C3), -F(1.86581691E-03 * C3),
+#undef F
+#define F(x) F_COS4(x)
+     F(0.7071067812 / C0),  F(0.9238795325 / C1),
+    -F(0.7071067812 / C0),  F(0.3826834324 / C1),
+    -F(0.7071067812 / C0), -F(0.3826834324 / C1),
+     F(0.7071067812 / C0), -F(0.9238795325 / C1),
+     F(0.3826834324 / C2), -F(1.0000000000 / C3),
+    -F(0.9238795325 / C2), -F(1.0000000000 / C3),
+     F(0.9238795325 / C2), -F(1.0000000000 / C3),
+    -F(0.3826834324 / C2), -F(1.0000000000 / C3),
+#undef F
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+};
+
+DECLARE_ALIGNED(SBC_ALIGN, const int16_t, ff_sbcdsp_analysis_consts_fixed4_simd_odd)[40 + 16] = {
+#define C0 1.3056875580
+#define C1 1.6772280856
+#define C2 1.0932568993
+#define C3 1.3056875580
+
+#define F(x) F_PROTO4(x)
+     F(2.73370904E-03 * C0),  F(5.36548976E-04 * C0),
+    -F(1.49188357E-03 * C1),  F(0.00000000E+00 * C1),
+     F(3.83720193E-03 * C2),  F(1.09137620E-02 * C2),
+     F(3.89205149E-03 * C3),  F(3.06012286E-03 * C3),
+     F(3.21939290E-02 * C0),  F(2.04385087E-02 * C0),
+    -F(2.88757392E-02 * C1),  F(0.00000000E+00 * C1),
+     F(2.58767811E-02 * C2),  F(1.35593274E-01 * C2),
+     F(6.13245186E-03 * C3),  F(7.76463494E-02 * C3),
+     F(2.81828203E-01 * C0),  F(1.94987841E-01 * C0),
+    -F(2.46636662E-01 * C1),  F(0.00000000E+00 * C1),
+     F(2.94315332E-01 * C2), -F(1.35593274E-01 * C2),
+     F(2.81828203E-01 * C3), -F(1.94987841E-01 * C3),
+     F(6.13245186E-03 * C0), -F(7.76463494E-02 * C0),
+     F(2.88217274E-02 * C1),  F(0.00000000E+00 * C1),
+     F(2.58767811E-02 * C2), -F(1.09137620E-02 * C2),
+     F(3.21939290E-02 * C3), -F(2.04385087E-02 * C3),
+     F(3.89205149E-03 * C0), -F(3.06012286E-03 * C0),
+    -F(1.86581691E-03 * C1),  F(0.00000000E+00 * C1),
+     F(3.83720193E-03 * C2),  F(0.00000000E+00 * C2),
+     F(2.73370904E-03 * C3), -F(5.36548976E-04 * C3),
+#undef F
+#define F(x) F_COS4(x)
+     F(0.9238795325 / C0), -F(1.0000000000 / C1),
+     F(0.3826834324 / C0), -F(1.0000000000 / C1),
+    -F(0.3826834324 / C0), -F(1.0000000000 / C1),
+    -F(0.9238795325 / C0), -F(1.0000000000 / C1),
+     F(0.7071067812 / C2),  F(0.3826834324 / C3),
+    -F(0.7071067812 / C2), -F(0.9238795325 / C3),
+    -F(0.7071067812 / C2),  F(0.9238795325 / C3),
+     F(0.7071067812 / C2), -F(0.3826834324 / C3),
+#undef F
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+};
+
+DECLARE_ALIGNED(SBC_ALIGN, const int16_t, ff_sbcdsp_analysis_consts_fixed8_simd_even)[80 + 64] = {
+#define C0 2.7906148894
+#define C1 2.4270044280
+#define C2 2.8015616024
+#define C3 3.1710363741
+#define C4 2.5377944043
+#define C5 2.4270044280
+#define C6 2.8015616024
+#define C7 3.1710363741
+
+#define F(x) F_PROTO8(x)
+     F(0.00000000E+00 * C0),  F(2.01182542E-03 * C0),
+     F(1.56575398E-04 * C1),  F(1.78371725E-03 * C1),
+     F(3.43256425E-04 * C2),  F(1.47640169E-03 * C2),
+     F(5.54620202E-04 * C3),  F(1.13992507E-03 * C3),
+    -F(8.23919506E-04 * C4),  F(0.00000000E+00 * C4),
+     F(2.10371989E-03 * C5),  F(3.49717454E-03 * C5),
+     F(1.99454554E-03 * C6),  F(1.64973098E-03 * C6),
+     F(1.61656283E-03 * C7),  F(1.78805361E-04 * C7),
+     F(5.65949473E-03 * C0),  F(1.29371806E-02 * C0),
+     F(8.02941163E-03 * C1),  F(1.53184106E-02 * C1),
+     F(1.04584443E-02 * C2),  F(1.62208471E-02 * C2),
+     F(1.27472335E-02 * C3),  F(1.59045603E-02 * C3),
+    -F(1.46525263E-02 * C4),  F(0.00000000E+00 * C4),
+     F(8.85757540E-03 * C5),  F(5.31873032E-02 * C5),
+     F(2.92408442E-03 * C6),  F(3.90751381E-02 * C6),
+    -F(4.91578024E-03 * C7),  F(2.61098752E-02 * C7),
+     F(6.79989431E-02 * C0),  F(1.46955068E-01 * C0),
+     F(8.29847578E-02 * C1),  F(1.45389847E-01 * C1),
+     F(9.75753918E-02 * C2),  F(1.40753505E-01 * C2),
+     F(1.11196689E-01 * C3),  F(1.33264415E-01 * C3),
+    -F(1.23264548E-01 * C4),  F(0.00000000E+00 * C4),
+     F(1.45389847E-01 * C5), -F(8.29847578E-02 * C5),
+     F(1.40753505E-01 * C6), -F(9.75753918E-02 * C6),
+     F(1.33264415E-01 * C7), -F(1.11196689E-01 * C7),
+    -F(6.79989431E-02 * C0),  F(1.29371806E-02 * C0),
+    -F(5.31873032E-02 * C1),  F(8.85757540E-03 * C1),
+    -F(3.90751381E-02 * C2),  F(2.92408442E-03 * C2),
+    -F(2.61098752E-02 * C3), -F(4.91578024E-03 * C3),
+     F(1.46404076E-02 * C4),  F(0.00000000E+00 * C4),
+     F(1.53184106E-02 * C5), -F(8.02941163E-03 * C5),
+     F(1.62208471E-02 * C6), -F(1.04584443E-02 * C6),
+     F(1.59045603E-02 * C7), -F(1.27472335E-02 * C7),
+    -F(5.65949473E-03 * C0),  F(2.01182542E-03 * C0),
+    -F(3.49717454E-03 * C1),  F(2.10371989E-03 * C1),
+    -F(1.64973098E-03 * C2),  F(1.99454554E-03 * C2),
+    -F(1.78805361E-04 * C3),  F(1.61656283E-03 * C3),
+    -F(9.02154502E-04 * C4),  F(0.00000000E+00 * C4),
+     F(1.78371725E-03 * C5), -F(1.56575398E-04 * C5),
+     F(1.47640169E-03 * C6), -F(3.43256425E-04 * C6),
+     F(1.13992507E-03 * C7), -F(5.54620202E-04 * C7),
+#undef F
+#define F(x) F_COS8(x)
+     F(0.7071067812 / C0),  F(0.8314696123 / C1),
+    -F(0.7071067812 / C0), -F(0.1950903220 / C1),
+    -F(0.7071067812 / C0), -F(0.9807852804 / C1),
+     F(0.7071067812 / C0), -F(0.5555702330 / C1),
+     F(0.7071067812 / C0),  F(0.5555702330 / C1),
+    -F(0.7071067812 / C0),  F(0.9807852804 / C1),
+    -F(0.7071067812 / C0),  F(0.1950903220 / C1),
+     F(0.7071067812 / C0), -F(0.8314696123 / C1),
+     F(0.9238795325 / C2),  F(0.9807852804 / C3),
+     F(0.3826834324 / C2),  F(0.8314696123 / C3),
+    -F(0.3826834324 / C2),  F(0.5555702330 / C3),
+    -F(0.9238795325 / C2),  F(0.1950903220 / C3),
+    -F(0.9238795325 / C2), -F(0.1950903220 / C3),
+    -F(0.3826834324 / C2), -F(0.5555702330 / C3),
+     F(0.3826834324 / C2), -F(0.8314696123 / C3),
+     F(0.9238795325 / C2), -F(0.9807852804 / C3),
+    -F(1.0000000000 / C4),  F(0.5555702330 / C5),
+    -F(1.0000000000 / C4), -F(0.9807852804 / C5),
+    -F(1.0000000000 / C4),  F(0.1950903220 / C5),
+    -F(1.0000000000 / C4),  F(0.8314696123 / C5),
+    -F(1.0000000000 / C4), -F(0.8314696123 / C5),
+    -F(1.0000000000 / C4), -F(0.1950903220 / C5),
+    -F(1.0000000000 / C4),  F(0.9807852804 / C5),
+    -F(1.0000000000 / C4), -F(0.5555702330 / C5),
+     F(0.3826834324 / C6),  F(0.1950903220 / C7),
+    -F(0.9238795325 / C6), -F(0.5555702330 / C7),
+     F(0.9238795325 / C6),  F(0.8314696123 / C7),
+    -F(0.3826834324 / C6), -F(0.9807852804 / C7),
+    -F(0.3826834324 / C6),  F(0.9807852804 / C7),
+     F(0.9238795325 / C6), -F(0.8314696123 / C7),
+    -F(0.9238795325 / C6),  F(0.5555702330 / C7),
+     F(0.3826834324 / C6), -F(0.1950903220 / C7),
+#undef F
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+};
+
+DECLARE_ALIGNED(SBC_ALIGN, const int16_t, ff_sbcdsp_analysis_consts_fixed8_simd_odd)[80 + 64] = {
+#define C0 2.5377944043
+#define C1 2.4270044280
+#define C2 2.8015616024
+#define C3 3.1710363741
+#define C4 2.7906148894
+#define C5 2.4270044280
+#define C6 2.8015616024
+#define C7 3.1710363741
+
+#define F(x) F_PROTO8(x)
+     F(0.00000000E+00 * C0), -F(8.23919506E-04 * C0),
+     F(1.56575398E-04 * C1),  F(1.78371725E-03 * C1),
+     F(3.43256425E-04 * C2),  F(1.47640169E-03 * C2),
+     F(5.54620202E-04 * C3),  F(1.13992507E-03 * C3),
+     F(2.01182542E-03 * C4),  F(5.65949473E-03 * C4),
+     F(2.10371989E-03 * C5),  F(3.49717454E-03 * C5),
+     F(1.99454554E-03 * C6),  F(1.64973098E-03 * C6),
+     F(1.61656283E-03 * C7),  F(1.78805361E-04 * C7),
+     F(0.00000000E+00 * C0), -F(1.46525263E-02 * C0),
+     F(8.02941163E-03 * C1),  F(1.53184106E-02 * C1),
+     F(1.04584443E-02 * C2),  F(1.62208471E-02 * C2),
+     F(1.27472335E-02 * C3),  F(1.59045603E-02 * C3),
+     F(1.29371806E-02 * C4),  F(6.79989431E-02 * C4),
+     F(8.85757540E-03 * C5),  F(5.31873032E-02 * C5),
+     F(2.92408442E-03 * C6),  F(3.90751381E-02 * C6),
+    -F(4.91578024E-03 * C7),  F(2.61098752E-02 * C7),
+     F(0.00000000E+00 * C0), -F(1.23264548E-01 * C0),
+     F(8.29847578E-02 * C1),  F(1.45389847E-01 * C1),
+     F(9.75753918E-02 * C2),  F(1.40753505E-01 * C2),
+     F(1.11196689E-01 * C3),  F(1.33264415E-01 * C3),
+     F(1.46955068E-01 * C4), -F(6.79989431E-02 * C4),
+     F(1.45389847E-01 * C5), -F(8.29847578E-02 * C5),
+     F(1.40753505E-01 * C6), -F(9.75753918E-02 * C6),
+     F(1.33264415E-01 * C7), -F(1.11196689E-01 * C7),
+     F(0.00000000E+00 * C0),  F(1.46404076E-02 * C0),
+    -F(5.31873032E-02 * C1),  F(8.85757540E-03 * C1),
+    -F(3.90751381E-02 * C2),  F(2.92408442E-03 * C2),
+    -F(2.61098752E-02 * C3), -F(4.91578024E-03 * C3),
+     F(1.29371806E-02 * C4), -F(5.65949473E-03 * C4),
+     F(1.53184106E-02 * C5), -F(8.02941163E-03 * C5),
+     F(1.62208471E-02 * C6), -F(1.04584443E-02 * C6),
+     F(1.59045603E-02 * C7), -F(1.27472335E-02 * C7),
+     F(0.00000000E+00 * C0), -F(9.02154502E-04 * C0),
+    -F(3.49717454E-03 * C1),  F(2.10371989E-03 * C1),
+    -F(1.64973098E-03 * C2),  F(1.99454554E-03 * C2),
+    -F(1.78805361E-04 * C3),  F(1.61656283E-03 * C3),
+     F(2.01182542E-03 * C4),  F(0.00000000E+00 * C4),
+     F(1.78371725E-03 * C5), -F(1.56575398E-04 * C5),
+     F(1.47640169E-03 * C6), -F(3.43256425E-04 * C6),
+     F(1.13992507E-03 * C7), -F(5.54620202E-04 * C7),
+#undef F
+#define F(x) F_COS8(x)
+    -F(1.0000000000 / C0),  F(0.8314696123 / C1),
+    -F(1.0000000000 / C0), -F(0.1950903220 / C1),
+    -F(1.0000000000 / C0), -F(0.9807852804 / C1),
+    -F(1.0000000000 / C0), -F(0.5555702330 / C1),
+    -F(1.0000000000 / C0),  F(0.5555702330 / C1),
+    -F(1.0000000000 / C0),  F(0.9807852804 / C1),
+    -F(1.0000000000 / C0),  F(0.1950903220 / C1),
+    -F(1.0000000000 / C0), -F(0.8314696123 / C1),
+     F(0.9238795325 / C2),  F(0.9807852804 / C3),
+     F(0.3826834324 / C2),  F(0.8314696123 / C3),
+    -F(0.3826834324 / C2),  F(0.5555702330 / C3),
+    -F(0.9238795325 / C2),  F(0.1950903220 / C3),
+    -F(0.9238795325 / C2), -F(0.1950903220 / C3),
+    -F(0.3826834324 / C2), -F(0.5555702330 / C3),
+     F(0.3826834324 / C2), -F(0.8314696123 / C3),
+     F(0.9238795325 / C2), -F(0.9807852804 / C3),
+     F(0.7071067812 / C4),  F(0.5555702330 / C5),
+    -F(0.7071067812 / C4), -F(0.9807852804 / C5),
+    -F(0.7071067812 / C4),  F(0.1950903220 / C5),
+     F(0.7071067812 / C4),  F(0.8314696123 / C5),
+     F(0.7071067812 / C4), -F(0.8314696123 / C5),
+    -F(0.7071067812 / C4), -F(0.1950903220 / C5),
+    -F(0.7071067812 / C4),  F(0.9807852804 / C5),
+     F(0.7071067812 / C4), -F(0.5555702330 / C5),
+     F(0.3826834324 / C6),  F(0.1950903220 / C7),
+    -F(0.9238795325 / C6), -F(0.5555702330 / C7),
+     F(0.9238795325 / C6),  F(0.8314696123 / C7),
+    -F(0.3826834324 / C6), -F(0.9807852804 / C7),
+    -F(0.3826834324 / C6),  F(0.9807852804 / C7),
+     F(0.9238795325 / C6), -F(0.8314696123 / C7),
+    -F(0.9238795325 / C6),  F(0.5555702330 / C7),
+     F(0.3826834324 / C6), -F(0.1950903220 / C7),
+#undef F
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+};
diff --git a/libavcodec/sbcdsp_data.h b/libavcodec/sbcdsp_data.h
new file mode 100644
index 0000000000..12839fb3c3
--- /dev/null
+++ b/libavcodec/sbcdsp_data.h
@@ -0,0 +1,57 @@ 
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * miscellaneous SBC tables
+ */
+
+#ifndef AVCODEC_SBCDSP_DATA_H
+#define AVCODEC_SBCDSP_DATA_H
+
+#include "sbc.h"
+
+#define SBC_PROTO_FIXED4_SCALE      ((sizeof(int16_t) * CHAR_BIT - 1) + 1)
+#define SBC_COS_TABLE_FIXED4_SCALE  ((sizeof(int16_t) * CHAR_BIT - 1)    )
+#define SBC_PROTO_FIXED8_SCALE      ((sizeof(int16_t) * CHAR_BIT - 1) + 1)
+#define SBC_COS_TABLE_FIXED8_SCALE  ((sizeof(int16_t) * CHAR_BIT - 1)    )
+
+/*
+ * Constant tables for the use in SIMD optimized analysis filters
+ * Each table consists of two parts:
+ * 1. reordered "proto" table
+ * 2. reordered "cos" table
+ *
+ * Due to non-symmetrical reordering, separate tables for "even"
+ * and "odd" cases are needed
+ */
+
+extern const int16_t ff_sbcdsp_analysis_consts_fixed4_simd_even[];
+extern const int16_t ff_sbcdsp_analysis_consts_fixed4_simd_odd[];
+extern const int16_t ff_sbcdsp_analysis_consts_fixed8_simd_even[];
+extern const int16_t ff_sbcdsp_analysis_consts_fixed8_simd_odd[];
+
+#endif /* AVCODEC_SBCDSP_DATA_H */
diff --git a/libavcodec/sbcenc.c b/libavcodec/sbcenc.c
new file mode 100644
index 0000000000..94a0331495
--- /dev/null
+++ b/libavcodec/sbcenc.c
@@ -0,0 +1,461 @@ 
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2012-2013  Intel Corporation
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2008  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC encoder implementation
+ */
+
+#include <stdbool.h>
+#include "libavutil/opt.h"
+#include "avcodec.h"
+#include "internal.h"
+#include "put_bits.h"
+#include "sbc.h"
+#include "sbcdsp.h"
+
+typedef struct SBCEncContext {
+    AVClass *class;
+
+    uint8_t frequency;
+    int blocks;
+    int subbands;
+    uint8_t mode;
+    int allocation;
+    int bitpool;
+
+    int joint_stereo;
+    int dual_channel;
+
+    bool init;
+    bool msbc;
+    DECLARE_ALIGNED(SBC_ALIGN, struct sbc_frame, frame);
+    DECLARE_ALIGNED(SBC_ALIGN, SBCDSPContext, dsp);
+    size_t (*pack_frame)(AVPacket *avpkt, struct sbc_frame *frame, int joint);
+} SBCEncContext;
+
+static int sbc_analyze_audio(SBCDSPContext *s, struct sbc_frame *frame)
+{
+    int ch, blk;
+    int16_t *x;
+
+    switch (frame->subbands) {
+    case 4:
+        for (ch = 0; ch < frame->channels; ch++) {
+            x = &s->X[ch][s->position - 4 *
+                    s->increment + frame->blocks * 4];
+            for (blk = 0; blk < frame->blocks;
+                        blk += s->increment) {
+                s->sbc_analyze_4s(
+                    s, x,
+                    frame->sb_sample_f[blk][ch],
+                    frame->sb_sample_f[blk + 1][ch] -
+                    frame->sb_sample_f[blk][ch]);
+                x -= 4 * s->increment;
+            }
+        }
+        return frame->blocks * 4;
+
+    case 8:
+        for (ch = 0; ch < frame->channels; ch++) {
+            x = &s->X[ch][s->position - 8 *
+                    s->increment + frame->blocks * 8];
+            for (blk = 0; blk < frame->blocks;
+                        blk += s->increment) {
+                s->sbc_analyze_8s(
+                    s, x,
+                    frame->sb_sample_f[blk][ch],
+                    frame->sb_sample_f[blk + 1][ch] -
+                    frame->sb_sample_f[blk][ch]);
+                x -= 8 * s->increment;
+            }
+        }
+        return frame->blocks * 8;
+
+    default:
+        return AVERROR(EIO);
+    }
+}
+
+/*
+ * Packs the SBC frame from frame into the memory in avpkt.
+ * Returns the length of the packed frame.
+ */
+
+static av_always_inline size_t sbc_pack_frame_internal(AVPacket *avpkt,
+                    struct sbc_frame *frame, int frame_subbands,
+                    int frame_channels, int joint)
+{
+    PutBitContext pb;
+
+    /* Will copy the header parts for CRC-8 calculation here */
+    uint8_t crc_header[11] = { 0 };
+    int crc_pos = 0;
+
+    uint32_t audio_sample;
+
+    int ch, sb, blk;        /* channel, subband, block and bit counters */
+    int bits[2][8];         /* bits distribution */
+    uint32_t levels[2][8];  /* levels are derived from that */
+    uint32_t sb_sample_delta[2][8];
+
+    /* Can't fill in crc yet */
+    crc_header[0] = avpkt->data[1];
+    crc_header[1] = avpkt->data[2];
+    crc_pos = 16;
+
+    init_put_bits(&pb, avpkt->data + 4, avpkt->size);
+
+    if (frame->mode == JOINT_STEREO) {
+        put_bits(&pb, frame_subbands, joint);
+        crc_header[crc_pos >> 3] = joint;
+        crc_pos += frame_subbands;
+    }
+
+    for (ch = 0; ch < frame_channels; ch++) {
+        for (sb = 0; sb < frame_subbands; sb++) {
+            put_bits(&pb, 4, frame->scale_factor[ch][sb] & 0x0F);
+            crc_header[crc_pos >> 3] <<= 4;
+            crc_header[crc_pos >> 3] |= frame->scale_factor[ch][sb] & 0x0F;
+            crc_pos += 4;
+        }
+    }
+
+    /* align the last crc byte */
+    if (crc_pos % 8)
+        crc_header[crc_pos >> 3] <<= 8 - (crc_pos % 8);
+
+    avpkt->data[3] = ff_sbc_crc8(crc_header, crc_pos);
+
+    ff_sbc_calculate_bits(frame, bits);
+
+    for (ch = 0; ch < frame_channels; ch++) {
+        for (sb = 0; sb < frame_subbands; sb++) {
+            levels[ch][sb] = ((1 << bits[ch][sb]) - 1) <<
+                (32 - (frame->scale_factor[ch][sb] +
+                    SCALE_OUT_BITS + 2));
+            sb_sample_delta[ch][sb] = (uint32_t) 1 <<
+                (frame->scale_factor[ch][sb] +
+                    SCALE_OUT_BITS + 1);
+        }
+    }
+
+    for (blk = 0; blk < frame->blocks; blk++) {
+        for (ch = 0; ch < frame_channels; ch++) {
+            for (sb = 0; sb < frame_subbands; sb++) {
+
+                if (bits[ch][sb] == 0)
+                    continue;
+
+                audio_sample = ((uint64_t) levels[ch][sb] *
+                    (sb_sample_delta[ch][sb] +
+                    frame->sb_sample_f[blk][ch][sb])) >> 32;
+
+                put_bits(&pb, bits[ch][sb], audio_sample);
+            }
+        }
+    }
+
+    flush_put_bits(&pb);
+
+    return (put_bits_count(&pb) + 7) / 8;
+}
+
+static size_t sbc_pack_frame(AVPacket *avpkt, struct sbc_frame *frame, int joint)
+{
+    int frame_subbands = 4;
+
+    avpkt->data[0] = SBC_SYNCWORD;
+
+    avpkt->data[1] = (frame->frequency & 0x03) << 6;
+    avpkt->data[1] |= (frame->block_mode & 0x03) << 4;
+    avpkt->data[1] |= (frame->mode & 0x03) << 2;
+    avpkt->data[1] |= (frame->allocation & 0x01) << 1;
+
+    avpkt->data[2] = frame->bitpool;
+
+    if (frame->subbands != 4)
+        frame_subbands = 8;
+
+    if ((frame->mode == MONO || frame->mode == DUAL_CHANNEL) &&
+            frame->bitpool > frame_subbands << 4)
+        return -5;
+
+    if ((frame->mode == STEREO || frame->mode == JOINT_STEREO) &&
+            frame->bitpool > frame_subbands << 5)
+        return -5;
+
+    if (frame->subbands == 4) {
+        if (frame->channels == 1)
+            return sbc_pack_frame_internal(avpkt, frame, 4, 1, joint);
+        else
+            return sbc_pack_frame_internal(avpkt, frame, 4, 2, joint);
+    } else {
+        avpkt->data[1] |= 0x01;
+        if (frame->channels == 1)
+            return sbc_pack_frame_internal(avpkt, frame, 8, 1, joint);
+        else
+            return sbc_pack_frame_internal(avpkt, frame, 8, 2, joint);
+    }
+}
+
+static size_t msbc_pack_frame(AVPacket *avpkt, struct sbc_frame *frame, int joint)
+{
+    avpkt->data[0] = MSBC_SYNCWORD;
+    avpkt->data[1] = 0;
+    avpkt->data[2] = 0;
+
+    return sbc_pack_frame_internal(avpkt, frame, 8, 1, joint);
+}
+
+static void sbc_encoder_init(bool msbc, SBCDSPContext *s,
+                             const struct sbc_frame *frame)
+{
+    memset(&s->X, 0, sizeof(s->X));
+    s->position = (SBC_X_BUFFER_SIZE - frame->subbands * 9) & ~7;
+    if (msbc)
+        s->increment = 1;
+    else
+        s->increment = 4;
+
+    ff_sbcdsp_init(s);
+}
+
+static int sbc_encode_init(AVCodecContext *avctx)
+{
+    SBCEncContext *sbc = avctx->priv_data;
+
+    if (sbc->joint_stereo && sbc->dual_channel) {
+        av_log(avctx, AV_LOG_ERROR, "joint_stereo and dual_channel "
+                                    "can't be used at the same time.\n");
+        return AVERROR(EINVAL);
+    }
+
+    sbc->pack_frame = sbc_pack_frame;
+
+    sbc->frequency = SBC_FREQ_44100;
+    sbc->mode = SBC_MODE_STEREO;
+    if (sbc->joint_stereo)
+        sbc->mode = SBC_MODE_JOINT_STEREO;
+    else if (sbc->dual_channel)
+        sbc->mode = SBC_MODE_DUAL_CHANNEL;
+    sbc->subbands >>= 3;
+    sbc->blocks = (sbc->blocks >> 2) - 1;
+
+    if (!avctx->frame_size)
+        avctx->frame_size = 4*(sbc->subbands + 1) * 4*(sbc->blocks + 1);
+
+    for (int i = 0; avctx->codec->supported_samplerates[i]; i++)
+        if (avctx->sample_rate == avctx->codec->supported_samplerates[i])
+            sbc->frequency = i;
+
+    if (avctx->channels == 1)
+        sbc->mode = SBC_MODE_MONO;
+
+    return 0;
+}
+
+static int msbc_encode_init(AVCodecContext *avctx)
+{
+    SBCEncContext *sbc = avctx->priv_data;
+
+    sbc->msbc = true;
+    sbc->pack_frame = msbc_pack_frame;
+
+    sbc->frequency = SBC_FREQ_16000;
+    sbc->blocks = MSBC_BLOCKS;
+    sbc->subbands = SBC_SB_8;
+    sbc->mode = SBC_MODE_MONO;
+    sbc->allocation = SBC_AM_LOUDNESS;
+    sbc->bitpool = 26;
+
+    if (!avctx->frame_size)
+        avctx->frame_size = 8 * MSBC_BLOCKS;
+
+    return 0;
+}
+
+/* Returns the output block size in bytes */
+static size_t sbc_get_frame_length(SBCEncContext *sbc)
+{
+    int ret;
+    uint8_t subbands, channels, blocks, joint, bitpool;
+
+    if (sbc->init && sbc->frame.bitpool == sbc->bitpool)
+        return sbc->frame.length;
+
+    subbands = sbc->subbands ? 8 : 4;
+    if (sbc->msbc)
+        blocks = MSBC_BLOCKS;
+    else
+        blocks = 4 + (sbc->blocks * 4);
+    channels = sbc->mode == SBC_MODE_MONO ? 1 : 2;
+    joint = sbc->mode == SBC_MODE_JOINT_STEREO ? 1 : 0;
+    bitpool = sbc->bitpool;
+
+    ret = 4 + (4 * subbands * channels) / 8;
+    /* This term is not always evenly divide so we round it up */
+    if (channels == 1 || sbc->mode == SBC_MODE_DUAL_CHANNEL)
+        ret += ((blocks * channels * bitpool) + 7) / 8;
+    else
+        ret += (((joint ? subbands : 0) + blocks * bitpool) + 7) / 8;
+
+    return ret;
+}
+
+static int sbc_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
+                            const AVFrame *frame, int *got_packet_ptr)
+{
+    SBCEncContext *sbc = avctx->priv_data;
+    int (*sbc_enc_process_input)(int position,
+            const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE],
+            int nsamples, int nchannels);
+    int ret;
+
+    if (!sbc)
+        return AVERROR(EIO);
+
+    if (!sbc->init) {
+        sbc->frame.frequency = sbc->frequency;
+        sbc->frame.mode = sbc->mode;
+        sbc->frame.channels = sbc->mode == SBC_MODE_MONO ? 1 : 2;
+        sbc->frame.allocation = sbc->allocation;
+        sbc->frame.subband_mode = sbc->subbands;
+        sbc->frame.subbands = sbc->subbands ? 8 : 4;
+        sbc->frame.block_mode = sbc->blocks;
+        if (sbc->msbc)
+            sbc->frame.blocks = MSBC_BLOCKS;
+        else
+            sbc->frame.blocks = 4 + (sbc->blocks * 4);
+        sbc->frame.bitpool = sbc->bitpool;
+        sbc->frame.codesize = sbc->frame.subbands * sbc->frame.blocks
+                              * sbc->frame.channels * 2;
+        sbc->frame.length = sbc_get_frame_length(sbc);
+
+        sbc_encoder_init(sbc->msbc, &sbc->dsp, &sbc->frame);
+        sbc->init = true;
+    } else if (sbc->frame.bitpool != sbc->bitpool) {
+        sbc->frame.length = sbc_get_frame_length(sbc);
+        sbc->frame.bitpool = sbc->bitpool;
+    }
+
+    /* input must be large enough to encode a complete frame */
+    if (frame->nb_samples * sbc->frame.channels * 2 < sbc->frame.codesize)
+        return 0;
+
+    if ((ret = ff_alloc_packet2(avctx, avpkt, sbc->frame.length, 0)) < 0)
+        return ret;
+
+    /* Select the needed input data processing function and call it */
+    if (sbc->frame.subbands == 8) {
+        sbc_enc_process_input = sbc->dsp.sbc_enc_process_input_8s;
+    } else {
+        sbc_enc_process_input = sbc->dsp.sbc_enc_process_input_4s;
+    }
+
+    sbc->dsp.position = sbc_enc_process_input(
+        sbc->dsp.position, frame->data[0],
+        sbc->dsp.X, sbc->frame.subbands * sbc->frame.blocks,
+        sbc->frame.channels);
+
+    sbc_analyze_audio(&sbc->dsp, &sbc->frame);
+
+    if (sbc->frame.mode == JOINT_STEREO) {
+        int j = sbc->dsp.sbc_calc_scalefactors_j(
+            sbc->frame.sb_sample_f, sbc->frame.scale_factor,
+            sbc->frame.blocks, sbc->frame.subbands);
+        sbc->pack_frame(avpkt, &sbc->frame, j);
+    } else {
+        sbc->dsp.sbc_calc_scalefactors(
+            sbc->frame.sb_sample_f, sbc->frame.scale_factor,
+            sbc->frame.blocks, sbc->frame.channels,
+            sbc->frame.subbands);
+        sbc->pack_frame(avpkt, &sbc->frame, 0);
+    }
+
+    *got_packet_ptr = 1;
+    return 0;
+}
+
+#define OFFSET(x) offsetof(SBCEncContext, x)
+#define AE AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    { "joint_stereo", "use joint stereo",
+      OFFSET(joint_stereo), AV_OPT_TYPE_BOOL, { .i64 =  0 }, 0,   1, AE },
+    { "dual_channel", "use dual channel",
+      OFFSET(dual_channel), AV_OPT_TYPE_BOOL, { .i64 =  0 }, 0,   1, AE },
+    { "subbands",     "number of subbands (4 or 8)",
+      OFFSET(subbands),     AV_OPT_TYPE_INT,  { .i64 =  8 }, 4,   8, AE },
+    { "bitpool",      "bitpool value",
+      OFFSET(bitpool),      AV_OPT_TYPE_INT,  { .i64 = 32 }, 0, 255, AE },
+    { "blocks",       "number of blocks (4, 8, 12 or 16)",
+      OFFSET(blocks),       AV_OPT_TYPE_INT,  { .i64 = 16 }, 4,  16, AE },
+    { "snr",          "use SNR mode (instead of loudness)",
+      OFFSET(allocation),   AV_OPT_TYPE_BOOL, { .i64 =  0 }, 0,   1, AE },
+    { NULL },
+};
+
+static const AVClass sbc_class = {
+    .class_name = "sbc encoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+#if CONFIG_SBC_ENCODER
+AVCodec ff_sbc_encoder = {
+    .name                  = "sbc",
+    .long_name             = NULL_IF_CONFIG_SMALL("SBC (low-complexity subband codec)"),
+    .type                  = AVMEDIA_TYPE_AUDIO,
+    .id                    = AV_CODEC_ID_SBC,
+    .priv_data_size        = sizeof(SBCEncContext),
+    .init                  = sbc_encode_init,
+    .encode2               = sbc_encode_frame,
+    .channel_layouts       = (const uint64_t[]) { AV_CH_LAYOUT_MONO,
+                                                  AV_CH_LAYOUT_STEREO, 0},
+    .sample_fmts           = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16,
+                                                             AV_SAMPLE_FMT_NONE },
+    .supported_samplerates = (const int[]) { 16000, 32000, 44100, 48000, 0 },
+    .priv_class            = &sbc_class,
+};
+#endif
+
+#if CONFIG_MSBC_ENCODER
+AVCodec ff_msbc_encoder = {
+    .name                  = "msbc",
+    .long_name             = NULL_IF_CONFIG_SMALL("mSBC (wideband speech mono SBC)"),
+    .type                  = AVMEDIA_TYPE_AUDIO,
+    .id                    = AV_CODEC_ID_MSBC,
+    .priv_data_size        = sizeof(SBCEncContext),
+    .init                  = msbc_encode_init,
+    .encode2               = sbc_encode_frame,
+    .channel_layouts       = (const uint64_t[]) { AV_CH_LAYOUT_MONO, 0},
+    .sample_fmts           = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16,
+                                                             AV_SAMPLE_FMT_NONE },
+    .supported_samplerates = (const int[]) { 16000, 0 },
+};
+#endif
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index a805cd37b4..2350c8bbee 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -63,6 +63,7 @@  OBJS-$(CONFIG_PNG_DECODER)             += x86/pngdsp_init.o
 OBJS-$(CONFIG_PRORES_DECODER)          += x86/proresdsp_init.o
 OBJS-$(CONFIG_PRORES_LGPL_DECODER)     += x86/proresdsp_init.o
 OBJS-$(CONFIG_RV40_DECODER)            += x86/rv40dsp_init.o
+OBJS-$(CONFIG_SBC_ENCODER)             += x86/sbcdsp_init.o
 OBJS-$(CONFIG_SVQ1_ENCODER)            += x86/svq1enc_init.o
 OBJS-$(CONFIG_TAK_DECODER)             += x86/takdsp_init.o
 OBJS-$(CONFIG_TRUEHD_DECODER)          += x86/mlpdsp_init.o
@@ -172,6 +173,7 @@  X86ASM-OBJS-$(CONFIG_PNG_DECODER)      += x86/pngdsp.o
 X86ASM-OBJS-$(CONFIG_PRORES_DECODER)   += x86/proresdsp.o
 X86ASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o
 X86ASM-OBJS-$(CONFIG_RV40_DECODER)     += x86/rv40dsp.o
+X86ASM-OBJS-$(CONFIG_SBC_ENCODER)      += x86/sbcdsp.o
 X86ASM-OBJS-$(CONFIG_SVQ1_ENCODER)     += x86/svq1enc.o
 X86ASM-OBJS-$(CONFIG_TAK_DECODER)      += x86/takdsp.o
 X86ASM-OBJS-$(CONFIG_TRUEHD_DECODER)   += x86/mlpdsp.o
diff --git a/libavcodec/x86/sbcdsp.asm b/libavcodec/x86/sbcdsp.asm
new file mode 100644
index 0000000000..ecf5298490
--- /dev/null
+++ b/libavcodec/x86/sbcdsp.asm
@@ -0,0 +1,290 @@ 
+;******************************************************************************
+;* SIMD optimized SBC encoder DSP functions
+;*
+;* Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+;* Copyright (C) 2008-2010  Nokia Corporation
+;* Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+;* Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+;* Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+scale_mask: times 2 dd 0x8000    ; 1 << (SBC_PROTO_FIXED4_SCALE - 1)
+
+SECTION .text
+
+;*******************************************************************
+;void ff_sbc_analyze_4(const int16_t *in, int32_t *out, const int16_t *consts);
+;*******************************************************************
+INIT_MMX mmx
+cglobal sbc_analyze_4, 3, 3, 4, in, out, consts
+    movq          m0, [inq]
+    movq          m1, [inq+8]
+    pmaddwd       m0, [constsq]
+    pmaddwd       m1, [constsq+8]
+    paddd         m0, [scale_mask]
+    paddd         m1, [scale_mask]
+
+    movq          m2, [inq+16]
+    movq          m3, [inq+24]
+    pmaddwd       m2, [constsq+16]
+    pmaddwd       m3, [constsq+24]
+    paddd         m0, m2
+    paddd         m1, m3
+
+    movq          m2, [inq+32]
+    movq          m3, [inq+40]
+    pmaddwd       m2, [constsq+32]
+    pmaddwd       m3, [constsq+40]
+    paddd         m0, m2
+    paddd         m1, m3
+
+    movq          m2, [inq+48]
+    movq          m3, [inq+56]
+    pmaddwd       m2, [constsq+48]
+    pmaddwd       m3, [constsq+56]
+    paddd         m0, m2
+    paddd         m1, m3
+
+    movq          m2, [inq+64]
+    movq          m3, [inq+72]
+    pmaddwd       m2, [constsq+64]
+    pmaddwd       m3, [constsq+72]
+    paddd         m0, m2
+    paddd         m1, m3
+
+    psrad         m0, 16    ; SBC_PROTO_FIXED4_SCALE
+    psrad         m1, 16    ; SBC_PROTO_FIXED4_SCALE
+    packssdw      m0, m0
+    packssdw      m1, m1
+
+    movq          m2, m0
+    pmaddwd       m0, [constsq+80]
+    pmaddwd       m2, [constsq+88]
+
+    movq          m3, m1
+    pmaddwd       m1, [constsq+96]
+    pmaddwd       m3, [constsq+104]
+    paddd         m0, m1
+    paddd         m2, m3
+
+    movq          [outq  ], m0
+    movq          [outq+8], m2
+
+    RET
+
+
+
+;*******************************************************************
+;void ff_sbc_analyze_8(const int16_t *in, int32_t *out, const int16_t *consts);
+;*******************************************************************
+INIT_MMX mmx
+cglobal sbc_analyze_8, 3, 3, 4, in, out, consts
+    movq          m0, [inq]
+    movq          m1, [inq+8]
+    movq          m2, [inq+16]
+    movq          m3, [inq+24]
+    pmaddwd       m0, [constsq]
+    pmaddwd       m1, [constsq+8]
+    pmaddwd       m2, [constsq+16]
+    pmaddwd       m3, [constsq+24]
+    paddd         m0, [scale_mask]
+    paddd         m1, [scale_mask]
+    paddd         m2, [scale_mask]
+    paddd         m3, [scale_mask]
+
+    movq          m4, [inq+32]
+    movq          m5, [inq+40]
+    movq          m6, [inq+48]
+    movq          m7, [inq+56]
+    pmaddwd       m4, [constsq+32]
+    pmaddwd       m5, [constsq+40]
+    pmaddwd       m6, [constsq+48]
+    pmaddwd       m7, [constsq+56]
+    paddd         m0, m4
+    paddd         m1, m5
+    paddd         m2, m6
+    paddd         m3, m7
+
+    movq          m4, [inq+64]
+    movq          m5, [inq+72]
+    movq          m6, [inq+80]
+    movq          m7, [inq+88]
+    pmaddwd       m4, [constsq+64]
+    pmaddwd       m5, [constsq+72]
+    pmaddwd       m6, [constsq+80]
+    pmaddwd       m7, [constsq+88]
+    paddd         m0, m4
+    paddd         m1, m5
+    paddd         m2, m6
+    paddd         m3, m7
+
+    movq          m4, [inq+96]
+    movq          m5, [inq+104]
+    movq          m6, [inq+112]
+    movq          m7, [inq+120]
+    pmaddwd       m4, [constsq+96]
+    pmaddwd       m5, [constsq+104]
+    pmaddwd       m6, [constsq+112]
+    pmaddwd       m7, [constsq+120]
+    paddd         m0, m4
+    paddd         m1, m5
+    paddd         m2, m6
+    paddd         m3, m7
+
+    movq          m4, [inq+128]
+    movq          m5, [inq+136]
+    movq          m6, [inq+144]
+    movq          m7, [inq+152]
+    pmaddwd       m4, [constsq+128]
+    pmaddwd       m5, [constsq+136]
+    pmaddwd       m6, [constsq+144]
+    pmaddwd       m7, [constsq+152]
+    paddd         m0, m4
+    paddd         m1, m5
+    paddd         m2, m6
+    paddd         m3, m7
+
+    psrad         m0, 16    ; SBC_PROTO_FIXED8_SCALE
+    psrad         m1, 16    ; SBC_PROTO_FIXED8_SCALE
+    psrad         m2, 16    ; SBC_PROTO_FIXED8_SCALE
+    psrad         m3, 16    ; SBC_PROTO_FIXED8_SCALE
+
+    packssdw      m0, m0
+    packssdw      m1, m1
+    packssdw      m2, m2
+    packssdw      m3, m3
+
+    movq          m4, m0
+    movq          m5, m0
+    pmaddwd       m4, [constsq+160]
+    pmaddwd       m5, [constsq+168]
+
+    movq          m6, m1
+    movq          m7, m1
+    pmaddwd       m6, [constsq+192]
+    pmaddwd       m7, [constsq+200]
+    paddd         m4, m6
+    paddd         m5, m7
+
+    movq          m6, m2
+    movq          m7, m2
+    pmaddwd       m6, [constsq+224]
+    pmaddwd       m7, [constsq+232]
+    paddd         m4, m6
+    paddd         m5, m7
+
+    movq          m6, m3
+    movq          m7, m3
+    pmaddwd       m6, [constsq+256]
+    pmaddwd       m7, [constsq+264]
+    paddd         m4, m6
+    paddd         m5, m7
+
+    movq          [outq  ], m4
+    movq          [outq+8], m5
+
+    movq          m5, m0
+    pmaddwd       m0, [constsq+176]
+    pmaddwd       m5, [constsq+184]
+
+    movq          m7, m1
+    pmaddwd       m1, [constsq+208]
+    pmaddwd       m7, [constsq+216]
+    paddd         m0, m1
+    paddd         m5, m7
+
+    movq          m7, m2
+    pmaddwd       m2, [constsq+240]
+    pmaddwd       m7, [constsq+248]
+    paddd         m0, m2
+    paddd         m5, m7
+
+    movq          m7, m3
+    pmaddwd       m3, [constsq+272]
+    pmaddwd       m7, [constsq+280]
+    paddd         m0, m3
+    paddd         m5, m7
+
+    movq          [outq+16], m0
+    movq          [outq+24], m5
+
+    RET
+
+
+;*******************************************************************
+;void ff_sbc_calc_scalefactors(int32_t sb_sample_f[16][2][8],
+;                              uint32_t scale_factor[2][8],
+;                              int blocks, int channels, int subbands)
+;*******************************************************************
+INIT_MMX mmx
+cglobal sbc_calc_scalefactors, 5, 9, 3, sb_sample_f, scale_factor, blocks, channels, subbands, ch, sb, sa, sf, blk
+    shl           channelsd, 5
+    mov           chq, 0
+.loop_1:
+    lea           saq, [sb_sample_fq + chq]
+    lea           sfq, [scale_factorq + chq]
+
+    mov           sbd, 0
+.loop_2:
+    ; blk = (blocks - 1) * 64;
+    lea           blkq, [blocksq - 1]
+    shl           blkd, 6
+
+    movq          m0, [scale_mask]
+.loop_3:
+    movq          m1, [saq+blkq]
+    pxor          m2, m2
+    pcmpgtd       m1, m2
+    paddd         m1, [saq+blkq]
+    pcmpgtd       m2, m1
+    pxor          m1, m2
+
+    por           m0, m1
+
+    sub           blkd, 64
+    jns           .loop_3
+
+    movd          blkd, m0
+    psrlq         m0,   32
+    bsr           blkd, blkd
+    sub           blkd, 15    ; SCALE_OUT_BITS
+    mov           [sfq], blkd
+
+    movd          blkd, m0
+    bsr           blkd, blkd
+    sub           blkd, 15    ; SCALE_OUT_BITS
+    mov           [sfq+4], blkd
+
+    add           saq, 8
+    add           sfq, 8
+
+    add           sbd, 2
+    cmp           sbd, subbandsd
+    jl            .loop_2
+
+    add           chd, 32
+    cmp           chd, channelsd
+    jl            .loop_1
+
+    emms
+    RET
diff --git a/libavcodec/x86/sbcdsp_init.c b/libavcodec/x86/sbcdsp_init.c
new file mode 100644
index 0000000000..86effecfdf
--- /dev/null
+++ b/libavcodec/x86/sbcdsp_init.c
@@ -0,0 +1,51 @@ 
+/*
+ * Bluetooth low-complexity, subband codec (SBC)
+ *
+ * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2008-2010  Nokia Corporation
+ * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SBC MMX optimization for some basic "building bricks"
+ */
+
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/sbcdsp.h"
+
+void ff_sbc_analyze_4_mmx(const int16_t *in, int32_t *out, const int16_t *consts);
+void ff_sbc_analyze_8_mmx(const int16_t *in, int32_t *out, const int16_t *consts);
+void ff_sbc_calc_scalefactors_mmx(int32_t sb_sample_f[16][2][8],
+                                  uint32_t scale_factor[2][8],
+                                  int blocks, int channels, int subbands);
+
+av_cold void ff_sbcdsp_init_x86(SBCDSPContext *s)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_MMX(cpu_flags)) {
+        s->sbc_analyze_4 = ff_sbc_analyze_4_mmx;
+        s->sbc_analyze_8 = ff_sbc_analyze_8_mmx;
+        s->sbc_calc_scalefactors = ff_sbc_calc_scalefactors_mmx;
+    }
+}