[FFmpeg-devel,1/2] aptx: implement the aptX bluetooth codec

Message ID	20171105233918.16186-2-aurel@gnuage.org
State	Superseded
Headers	show Delivered-To: ffmpegpatchwork@gmail.com Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; From: Aurelien Jacobs <aurel@gnuage.org> To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Date: Mon, 6 Nov 2017 00:39:17 +0100 Message-Id: <20171105233918.16186-2-aurel@gnuage.org> In-Reply-To: <20171105233918.16186-1-aurel@gnuage.org> References: <20171105233918.16186-1-aurel@gnuage.org> Subject: [FFmpeg-devel] [PATCH 1/2] aptx: implement the aptX bluetooth codec Precedence: list Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Cc: Aurelien Jacobs <aurel@gnuage.org> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

On Mon, Nov 06, 2017 at 12:30:02AM +0000, Rostislav Pehlivanov wrote: > On 5 November 2017 at 23:39, Aurelien Jacobs <aurel@gnuage.org> wrote: > > > The encoder was reverse engineered from binary library and from > > EP0398973B1 patent (long expired). > > The decoder was simply deduced from the encoder. > > --- > > doc/general.texi | 2 + > > libavcodec/Makefile | 2 + > > libavcodec/allcodecs.c | 1 + > > libavcodec/aptx.c | 854 ++++++++++++++++++++++++++++++ > > ++++++++++++++++++ > > libavcodec/avcodec.h | 1 + > > libavcodec/codec_desc.c | 7 + > > 6 files changed, 867 insertions(+) > > create mode 100644 libavcodec/aptx.c > > > > Very nice job Thanks ! > > + > > +static const int32_t quantization_factors[32] = { > > + 2048 << 11, > > + 2093 << 11, > > + 2139 << 11, > > + 2186 << 11, > > + 2233 << 11, > > + 2282 << 11, > > + 2332 << 11, > > + 2383 << 11, > > + 2435 << 11, > > + 2489 << 11, > > + 2543 << 11, > > + 2599 << 11, > > + 2656 << 11, > > + 2714 << 11, > > + 2774 << 11, > > + 2834 << 11, > > + 2896 << 11, > > + 2960 << 11, > > + 3025 << 11, > > + 3091 << 11, > > + 3158 << 11, > > + 3228 << 11, > > + 3298 << 11, > > + 3371 << 11, > > + 3444 << 11, > > + 3520 << 11, > > + 3597 << 11, > > + 3676 << 11, > > + 3756 << 11, > > + 3838 << 11, > > + 3922 << 11, > > + 4008 << 11, > > +}; > > > > > First of all, please put all numbers on the same line. > Second of all, its pointless to do a shift here, either change the numbers > or better yet, since you already do a shift down: > > > > > + /* update quantization factor */ > > + idx = (invert_quantize->factor_select & 0xFF) >> 3; > > + shift -= invert_quantize->factor_select >> 8; > > + invert_quantize->quantization_factor = quantization_factors[idx] >> > > shift; > > +} > > > > > Which would be equivalent to: > > idx = (invert_quantize->factor_select & 0xFF) >> 3; > > shift -= invert_quantize->factor_select >> 8; > > invert_quantize->quantization_factor = (quantization_factors[idx] << 11) > > >> shift; > > > > The compiler ought to be smart enough to handle that as a single operation. I don't think the compiler will optimze as much, but that's a trivial operation, and there is no mesurable difference, so indeed, I moved the shift out of the table and cleaned up the formatting. > > +static int##size##_t rshift##size(int##size##_t value, int shift) > > \ > > > > +static int##size##_t rshift##size##_clip24(int##size##_t value, int > > shift) \ > > > > +static void aptx_update_codeword_history(Channel *channel) > > > > +static void aptx_generate_dither(Channel *channel) > > > > > +static void aptx_qmf_filter_signal_push(FilterSignal *signal, int32_t > > sample) > > > > +static int32_t aptx_qmf_convolution(FilterSignal *signal, > > + const int32_t coeffs[FILTER_TAPS], > > + int shift) > > > > +static void aptx_qmf_polyphase_analysis(FilterSignal signal[NB_FILTERS], > > + const int32_t > > coeffs[NB_FILTERS][FILTER_TAPS], > > + int shift, > > + int32_t samples[NB_FILTERS], > > + int32_t *low_subband_output, > > + int32_t *high_subband_output) > > + > > > > > Add an inline flag to small functions like these. Won't make a difference > but eh. Done. > > + > > +static void aptx_quantise_difference(Quantize *quantize, > > + int32_t sample_difference, > > + int32_t dither, > > + int32_t quantization_factor, > > + ConstTables *tables) > > > > English spelling of quantize? I prefer quantize since that's how its > spelled throughout the entire codebase. Good catch. Fixed. > > +{ > > + const int32_t *intervals = tables->quantize_intervals; > > + int32_t quantized_sample, dithered_sample, parity_change; > > + int32_t d, mean, interval, inv; > > + int64_t error; > > + > > + quantized_sample = aptx_bin_search(FFABS(sample_difference) >> 4, > > + quantization_factor, > > + intervals, tables->tables_size); > > + > > + d = rshift32_clip24(MULH(dither, dither), 7) - (1 << 23); > > + d = rshift64(MUL64(d, tables->quantize_dither_factors[quantized_sample]), > > 23); > > + > > + intervals += quantized_sample; > > + mean = (intervals[1] + intervals[0]) / 2; > > + interval = intervals[1] - intervals[0]; > > + if (sample_difference < 0) > > + interval = -interval; > > > > > Can be simplified to: > interval *= 1 - 2*(sample_difference < 0); > or > interval *= sample_difference < 0 ? -1 : +1; I prefer: interval *= -(sample_difference < 0) | 1; > > + > > + dithered_sample = rshift64_clip24(MUL64(dither, interval) + > > ((int64_t)(mean + d) << 32), 32); > > + error = ((int64_t)FFABS(sample_difference) << 20) - > > MUL64(dithered_sample, quantization_factor); > > + quantize->error = FFABS(rshift64(error, 23)); > > + > > + parity_change = quantized_sample; > > + if (error < 0) quantized_sample--; > > + else parity_change--; > > > + > > > > Coding style issues, seen this in much of the code, must be > if (something) > stuff; > else > other_stuff; OK. > > + inv = -(sample_difference < 0); > > + quantize->quantized_sample = quantized_sample ^ inv; > > + quantize->quantized_sample_parity_change = parity_change ^ inv; > > > > That's nasty. You invert the sign and at the same time you increment by one > and you decrement (before that). That's indeed nasty. > I'm not quite sure but I think this might be an error. This is in the > encoder section too, so I'm wondering, is this what the spec says to do > (most codecs only specify the decoder except for old audio/speech ones like > this one). It would be awsome to have the spec, but unfortunatly the best we have is reverse engineering... But I double checked what the binary library does and I can assure you that this is not an error. My encoder produce a bit exact identical output as the binary library. > +} > > + > > +static int aptx_decode_frame(AVCodecContext *avctx, void *data, > > + int *got_frame_ptr, AVPacket *avpkt) > > +{ > > + AptXContext *s = avctx->priv_data; > > + AVFrame *frame = data; > > + const uint8_t *buf = avpkt->data; > > + int len = avpkt->size; > > + int16_t *ptr; > > + int ret; > > + > > + if (len < 4) { > > + av_log(avctx, AV_LOG_ERROR, "Packet is too small\n"); > > + return AVERROR_INVALIDDATA; > > + } > > + > > + /* get output buffer */ > > + frame->nb_samples = len & ~3; > > + if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) > > + return ret; > > + ptr = (int16_t *)frame->data[0]; > > > > No need to cast, nor to define the ptr as int16_t OK. > > + > > + while (len >= 4) { > > + int32_t samples[NB_CHANNELS][4]; > > + > > + if (aptx_decode_samples(s, buf, samples)) > > + av_log(avctx, AV_LOG_ERROR, "Synchronization error\n"); > > > > Return AVERROR_INVALIDDDATA? That's probably better indeed, and that will allow a bluetooth manager to detect stream corruption and to re-initialize the radio link. > > + > > > > > > + for (int i = 0; i < 4; i++) { > > + /* convert 24 bits planar samples to 16 bits interleaved > > output */ > > + *ptr++ = samples[LEFT ][i] >> 8; > > + *ptr++ = samples[RIGHT][i] >> 8; > > > > How horrible, don't interleave the samples, leave them as planar. > Change the output format in AVCodec and use > > AV_WN16(frame->data[<channel>][<sample>], samples[<channel>][<sample>] >> > 8); > > To write the data. No point to convert to interleaved when the data's > planar. The data has to be shifted anyway, so the interleaving is done for free at the same time. But I get it that it is not very elegant. Changed to use native planar format. > > > > + > > + *got_frame_ptr = 1; > > + return avpkt->size - len; > > > > ? > Decoders should return the number of bytes read from the packet That's exactly what it does. len represent the size of the input data that has not yet been consumed by the decoder. Anyway, I've simplified this while swtiching to native planar format. > > +} > > + > > +static int aptx_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, > > + const AVFrame *frame, int *got_packet_ptr) > > +{ > > + AptXContext *s = avctx->priv_data; > > + int16_t *ptr = (int16_t *)frame->data[0]; > > + int32_t samples[NB_CHANNELS][4]; > > + int ret; > > + > > + /* input must contain a multiple of 4 samples */ > > + if (frame->nb_samples & 3 || frame->nb_samples == 0) { > > + av_log(avctx, AV_LOG_ERROR, "Frame must have a multiple of 4 > > samples\n"); > > + return 0; > > + } > > + > > + if ((ret = ff_alloc_packet2(avctx, avpkt, frame->nb_samples, 0)) < 0) > > + return ret; > > + > > + for (int pos = 0; pos < frame->nb_samples; pos += 4) { > > + for (int i = 0; i < 4; i++) { > > + /* convert 16 bits interleaved input to 24 bits planar > > samples */ > > + samples[LEFT][i] = ptr[LEFT ] << 8; > > + samples[RIGHT][i] = ptr[RIGHT] << 8; > > + ptr += NB_CHANNELS; > > + } > > > > Once again use planar sample fmt and then > AV_RN16(&frame->data[<channel>][<sample_offset>]) to read them. Done. > > + > > + aptx_encode_samples(s, samples, avpkt->data + pos); > > + } > > + > > + *got_packet_ptr = 1; > > + return 0; > > +} > > + > > + > > +#if CONFIG_APTX_DECODER > > +AVCodec ff_aptx_decoder = { > > + .name = "aptx", > > + .long_name = NULL_IF_CONFIG_SMALL("aptX (Audio Processing > > Technology for Bluetooth)"), > > + .type = AVMEDIA_TYPE_AUDIO, > > + .id = AV_CODEC_ID_APTX, > > + .priv_data_size = sizeof(AptXContext), > > + .init = aptx_init, > > + .decode = aptx_decode_frame, > > + .capabilities = AV_CODEC_CAP_DR1, > > + .channel_layouts = (const uint64_t[]) { AV_CH_LAYOUT_STEREO, 0}, > > + .sample_fmts = (const enum AVSampleFormat[]) { > > AV_SAMPLE_FMT_S16, > > > > Change to AV_SAMPLE_FMT_S16P Actually changed to AV_SAMPLE_FMT_S32P as suggested. > > + > > AV_SAMPLE_FMT_NONE }, > > +}; > > +#endif > > + > > +#if CONFIG_APTX_ENCODER > > +AVCodec ff_aptx_encoder = { > > + .name = "aptx", > > + .long_name = NULL_IF_CONFIG_SMALL("aptX (Audio Processing > > Technology for Bluetooth)"), > > + .type = AVMEDIA_TYPE_AUDIO, > > + .id = AV_CODEC_ID_APTX, > > + .priv_data_size = sizeof(AptXContext), > > + .init = aptx_init, > > + .encode2 = aptx_encode_frame, > > + .capabilities = AV_CODEC_CAP_VARIABLE_FRAME_SIZE, > > + .channel_layouts = (const uint64_t[]) { AV_CH_LAYOUT_STEREO, 0}, > > + .sample_fmts = (const enum AVSampleFormat[]) { > > AV_SAMPLE_FMT_S16, > > > > And here to AV_SAMPLE_FMT_S16P Same here. > Apart from that, doesn't look too bad. Thanks a lot for the review. I will submit updated patch set.

diff --git a/doc/general.texi b/doc/general.texi index 9e6ae13435..4a89531c47 100644 --- a/doc/general.texi +++ b/doc/general.texi @@ -991,6 +991,8 @@ following image formats are supported: @item Amazing Studio PAF Audio @tab @tab X @item Apple lossless audio @tab X @tab X @tab QuickTime fourcc 'alac' +@item aptX @tab X @tab X + @tab Used in Bluetooth A2DP @item ATRAC1 @tab @tab X @item ATRAC3 @tab @tab X @item ATRAC3+ @tab @tab X diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 3a33361f33..25706a263d 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -188,6 +188,8 @@ OBJS-$(CONFIG_AMV_ENCODER) += mjpegenc.o mjpegenc_common.o \ OBJS-$(CONFIG_ANM_DECODER) += anm.o OBJS-$(CONFIG_ANSI_DECODER) += ansi.o cga_data.o OBJS-$(CONFIG_APE_DECODER) += apedec.o +OBJS-$(CONFIG_APTX_DECODER) += aptx.o +OBJS-$(CONFIG_APTX_ENCODER) += aptx.o OBJS-$(CONFIG_APNG_DECODER) += png.o pngdec.o pngdsp.o OBJS-$(CONFIG_APNG_ENCODER) += png.o pngenc.o OBJS-$(CONFIG_SSA_DECODER) += assdec.o ass.o diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index 98655ddd7c..61abe9939c 100644 --- a/libavcodec/allcodecs.c +++ b/libavcodec/allcodecs.c @@ -406,6 +406,7 @@ static void register_all(void) REGISTER_DECODER(AMRNB, amrnb); REGISTER_DECODER(AMRWB, amrwb); REGISTER_DECODER(APE, ape); + REGISTER_ENCDEC (APTX, aptx); REGISTER_DECODER(ATRAC1, atrac1); REGISTER_DECODER(ATRAC3, atrac3); REGISTER_DECODER(ATRAC3AL, atrac3al); diff --git a/libavcodec/aptx.c b/libavcodec/aptx.c new file mode 100644 index 0000000000..4ecb6bb231 --- /dev/null +++ b/libavcodec/aptx.c @@ -0,0 +1,854 @@ +/* + * Audio Processing Technology codec for Bluetooth (aptX) + * + * Copyright (C) 2017 Aurelien Jacobs <aurel@gnuage.org> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/intreadwrite.h" +#include "avcodec.h" +#include "internal.h" +#include "mathops.h" + + +enum channels { + LEFT, + RIGHT, + NB_CHANNELS +}; + +enum subbands { + LF, // Low Frequency (0-5.5 kHz) + MLF, // Medium-Low Frequency (5.5-11kHz) + MHF, // Medium-High Frequency (11-16.5kHz) + HF, // High Frequency (16.5-22kHz) + NB_SUBBANDS +}; + +#define NB_FILTERS 2 +#define FILTER_TAPS 16 + +typedef struct { + int pos; + int32_t buffer[2*FILTER_TAPS]; +} FilterSignal; + +typedef struct { + FilterSignal outer_filter_signal[NB_FILTERS]; + FilterSignal inner_filter_signal[NB_FILTERS][NB_FILTERS]; +} QMFAnalysis; + +typedef struct { + int32_t quantized_sample; + int32_t quantized_sample_parity_change; + int32_t error; +} Quantize; + +typedef struct { + int32_t quantization_factor; + int32_t factor_select; + int32_t reconstructed_difference; +} InvertQuantize; + +typedef struct { + int32_t prev_sign[2]; + int32_t s_weight[2]; + int32_t d_weight[24]; + int32_t pos; + int32_t reconstructed_differences[48]; + int32_t previous_reconstructed_sample; + int32_t predicted_difference; + int32_t predicted_sample; +} Prediction; + +typedef struct { + int32_t codeword_history; + int32_t dither_parity; + int32_t dither[NB_SUBBANDS]; + + QMFAnalysis qmf; + Quantize quantize[NB_SUBBANDS]; + InvertQuantize invert_quantize[NB_SUBBANDS]; + Prediction prediction[NB_SUBBANDS]; +} Channel; + +typedef struct { + int32_t sync_idx; + Channel channels[NB_CHANNELS]; +} AptXContext; + + +static const int32_t quantize_intervals_LF[65] = { + -9948, 9948, 29860, 49808, 69822, 89926, 110144, 130502, + 151026, 171738, 192666, 213832, 235264, 256982, 279014, 301384, + 324118, 347244, 370790, 394782, 419250, 444226, 469742, 495832, + 522536, 549890, 577936, 606720, 636290, 666700, 698006, 730270, + 763562, 797958, 833538, 870398, 908640, 948376, 989740, 1032874, + 1077948, 1125150, 1174700, 1226850, 1281900, 1340196, 1402156, 1468282, + 1539182, 1615610, 1698514, 1789098, 1888944, 2000168, 2125700, 2269750, + 2438670, 2642660, 2899462, 3243240, 3746078, 4535138, 5664098, 7102424, + 8897462, +}; +static const int32_t invert_quantize_dither_factors_LF[65] = { + 9948, 9948, 9962, 9988, 10026, 10078, 10142, 10218, + 10306, 10408, 10520, 10646, 10784, 10934, 11098, 11274, + 11462, 11664, 11880, 12112, 12358, 12618, 12898, 13194, + 13510, 13844, 14202, 14582, 14988, 15422, 15884, 16380, + 16912, 17484, 18098, 18762, 19480, 20258, 21106, 22030, + 23044, 24158, 25390, 26760, 28290, 30008, 31954, 34172, + 36728, 39700, 43202, 47382, 52462, 58762, 66770, 77280, + 91642, 112348, 144452, 199326, 303512, 485546, 643414, 794914, + 1000124, +}; +static const int32_t quantize_dither_factors_LF[65] = { + 0, 4, 7, 10, 13, 16, 19, 22, + 26, 28, 32, 35, 38, 41, 44, 47, + 51, 54, 58, 62, 65, 70, 74, 79, + 84, 90, 95, 102, 109, 116, 124, 133, + 143, 154, 166, 180, 195, 212, 231, 254, + 279, 308, 343, 383, 430, 487, 555, 639, + 743, 876, 1045, 1270, 1575, 2002, 2628, 3591, + 5177, 8026, 13719, 26047, 45509, 39467, 37875, 51303, + 0, +}; +static const int32_t quantize_factor_select_offset_LF[65] = { + 0, -21, -19, -17, -15, -12, -10, -8, + -6, -4, -1, 1, 3, 6, 8, 10, + 13, 15, 18, 20, 23, 26, 29, 31, + 34, 37, 40, 43, 47, 50, 53, 57, + 60, 64, 68, 72, 76, 80, 85, 89, + 94, 99, 105, 110, 116, 123, 129, 136, + 144, 152, 161, 171, 182, 194, 207, 223, + 241, 263, 291, 328, 382, 467, 522, 522, + 522, +}; + + +static const int32_t quantize_intervals_MLF[9] = { + -89806, 89806, 278502, 494338, 759442, 1113112, 1652322, 2720256, 5190186, +}; +static const int32_t invert_quantize_dither_factors_MLF[9] = { + 89806, 89806, 98890, 116946, 148158, 205512, 333698, 734236, 1735696, +}; +static const int32_t quantize_dither_factors_MLF[9] = { + 0, 2271, 4514, 7803, 14339, 32047, 100135, 250365, 0, +}; +static const int32_t quantize_factor_select_offset_MLF[9] = { + 0, -14, 6, 29, 58, 96, 154, 270, 521, +}; + + +static const int32_t quantize_intervals_MHF[3] = { + -194080, 194080, 890562, +}; +static const int32_t invert_quantize_dither_factors_MHF[3] = { + 194080, 194080, 502402, +}; +static const int32_t quantize_dither_factors_MHF[3] = { + 0, 77081, 0, +}; +static const int32_t quantize_factor_select_offset_MHF[3] = { + 0, -33, 136, +}; + + +static const int32_t quantize_intervals_HF[5] = { + -163006, 163006, 542708, 1120554, 2669238, +}; +static const int32_t invert_quantize_dither_factors_HF[5] = { + 163006, 163006, 216698, 361148, 1187538, +}; +static const int32_t quantize_dither_factors_HF[5] = { + 0, 13423, 36113, 206598, 0, +}; +static const int32_t quantize_factor_select_offset_HF[5] = { + 0, -8, 33, 95, 262, +}; + +typedef const struct { + const int32_t *quantize_intervals; + const int32_t *invert_quantize_dither_factors; + const int32_t *quantize_dither_factors; + const int32_t *quantize_factor_select_offset; + int tables_size; + int32_t quantized_bits; + int32_t prediction_order; +} ConstTables; + +static ConstTables tables[NB_SUBBANDS] = { + [LF] = { quantize_intervals_LF, + invert_quantize_dither_factors_LF, + quantize_dither_factors_LF, + quantize_factor_select_offset_LF, + FF_ARRAY_ELEMS(quantize_intervals_LF), + 7, 24 }, + [MLF] = { quantize_intervals_MLF, + invert_quantize_dither_factors_MLF, + quantize_dither_factors_MLF, + quantize_factor_select_offset_MLF, + FF_ARRAY_ELEMS(quantize_intervals_MLF), + 4, 12 }, + [MHF] = { quantize_intervals_MHF, + invert_quantize_dither_factors_MHF, + quantize_dither_factors_MHF, + quantize_factor_select_offset_MHF, + FF_ARRAY_ELEMS(quantize_intervals_MHF), + 2, 6 }, + [HF] = { quantize_intervals_HF, + invert_quantize_dither_factors_HF, + quantize_dither_factors_HF, + quantize_factor_select_offset_HF, + FF_ARRAY_ELEMS(quantize_intervals_HF), + 3, 12 }, +}; + +static const int32_t quantization_factors[32] = { + 2048 << 11, + 2093 << 11, + 2139 << 11, + 2186 << 11, + 2233 << 11, + 2282 << 11, + 2332 << 11, + 2383 << 11, + 2435 << 11, + 2489 << 11, + 2543 << 11, + 2599 << 11, + 2656 << 11, + 2714 << 11, + 2774 << 11, + 2834 << 11, + 2896 << 11, + 2960 << 11, + 3025 << 11, + 3091 << 11, + 3158 << 11, + 3228 << 11, + 3298 << 11, + 3371 << 11, + 3444 << 11, + 3520 << 11, + 3597 << 11, + 3676 << 11, + 3756 << 11, + 3838 << 11, + 3922 << 11, + 4008 << 11, +}; + + +/* Rounded right shift with optionnal clipping */ +#define RSHIFT_SIZE(size) \ +static int##size##_t rshift##size(int##size##_t value, int shift) \ +{ \ + int##size##_t rounding = (int##size##_t)1 << (shift - 1); \ + int##size##_t mask = ((int##size##_t)1 << (shift + 1)) - 1; \ + return ((value + rounding) >> shift) - ((value & mask) == rounding); \ +} \ +static int##size##_t rshift##size##_clip24(int##size##_t value, int shift) \ +{ \ + return av_clip_intp2(rshift##size(value, shift), 23); \ +} +RSHIFT_SIZE(32) +RSHIFT_SIZE(64) + + +static void aptx_update_codeword_history(Channel *channel) +{ + int32_t cw = ((channel->quantize[0].quantized_sample & 3) << 0) + + ((channel->quantize[1].quantized_sample & 2) << 1) + + ((channel->quantize[2].quantized_sample & 1) << 3); + channel->codeword_history = (cw << 8) + (channel->codeword_history << 4); +} + +static void aptx_generate_dither(Channel *channel) +{ + int64_t m; + int32_t d; + + aptx_update_codeword_history(channel); + + m = (int64_t)5184443 * (channel->codeword_history >> 7); + d = (m << 2) + (m >> 22); + for (int subband = 0; subband < NB_SUBBANDS; subband++) + channel->dither[subband] = d << (23 - 5*subband); + channel->dither_parity = (d >> 25) & 1; +} + +/* + * Convolution filter coefficients for the outer QMF of the QMF tree. + * The 2 sets are a mirror of each other. + */ +static const int32_t aptx_qmf_outer_coeffs[NB_FILTERS][FILTER_TAPS] = { + { + 730, -413, -9611, 43626, -121026, 269973, -585547, 2801966, + 697128, -160481, 27611, 8478, -10043, 3511, 688, -897, + }, + { + -897, 688, 3511, -10043, 8478, 27611, -160481, 697128, + 2801966, -585547, 269973, -121026, 43626, -9611, -413, 730, + }, +}; + +/* + * Convolution filter coefficients for the inner QMF of the QMF tree. + * The 2 sets are a mirror of each other. + */ +static const int32_t aptx_qmf_inner_coeffs[NB_FILTERS][FILTER_TAPS] = { + { + 1033, -584, -13592, 61697, -171156, 381799, -828088, 3962579, + 985888, -226954, 39048, 11990, -14203, 4966, 973, -1268, + }, + { + -1268, 973, 4966, -14203, 11990, 39048, -226954, 985888, + 3962579, -828088, 381799, -171156, 61697, -13592, -584, 1033, + }, +}; + +/* + * Push one sample into a circular signal buffer. + */ +static void aptx_qmf_filter_signal_push(FilterSignal *signal, int32_t sample) +{ + signal->buffer[signal->pos ] = sample; + signal->buffer[signal->pos+FILTER_TAPS] = sample; + signal->pos = (signal->pos + 1) % FILTER_TAPS; +} + +/* + * Compute the convolution of the signal with the coefficients, and reduce + * to 24 bits by applying the specified right shifting. + */ +static int32_t aptx_qmf_convolution(FilterSignal *signal, + const int32_t coeffs[FILTER_TAPS], + int shift) +{ + int32_t *sig = &signal->buffer[signal->pos]; + int64_t e = 0; + + for (int i = 0; i < FILTER_TAPS; i++) + e += MUL64(sig[i], coeffs[i]); + + return rshift64_clip24(e, shift); +} + +/* + * Half-band QMF analysis filter realized with a polyphase FIR filter. + * Split into 2 subbands and downsample by 2. + * So for each pair of samples that goes in, one sample goes out, + * split into 2 separate subbands. + */ +static void aptx_qmf_polyphase_analysis(FilterSignal signal[NB_FILTERS], + const int32_t coeffs[NB_FILTERS][FILTER_TAPS], + int shift, + int32_t samples[NB_FILTERS], + int32_t *low_subband_output, + int32_t *high_subband_output) +{ + int32_t subbands[NB_FILTERS]; + + for (int i = 0; i < NB_FILTERS; i++) { + aptx_qmf_filter_signal_push(&signal[i], samples[NB_FILTERS-1-i]); + subbands[i] = aptx_qmf_convolution(&signal[i], coeffs[i], shift); + } + + *low_subband_output = av_clip_intp2(subbands[0] + subbands[1], 23); + *high_subband_output = av_clip_intp2(subbands[0] - subbands[1], 23); +} + +/* + * Two stage QMF analysis tree. + * Split 4 input samples into 4 subbands and downsample by 4. + * So for each group of 4 samples that goes in, one sample goes out, + * split into 4 separate subbands. + */ +static void aptx_qmf_tree_analysis(QMFAnalysis *qmf, + int32_t samples[4], + int32_t subband_samples[4]) +{ + int32_t intermediate_samples[4]; + + /* Split 4 input samples into 2 intermediate subbands downsampled to 2 samples */ + for (int i = 0; i < 2; i++) + aptx_qmf_polyphase_analysis(qmf->outer_filter_signal, + aptx_qmf_outer_coeffs, 23, + &samples[2*i], + &intermediate_samples[0+i], + &intermediate_samples[2+i]); + + /* Split 2 intermediate subband samples into 4 final subbands downsampled to 1 sample */ + for (int i = 0; i < 2; i++) + aptx_qmf_polyphase_analysis(qmf->inner_filter_signal[i], + aptx_qmf_inner_coeffs, 23, + &intermediate_samples[2*i], + &subband_samples[2*i+0], + &subband_samples[2*i+1]); +} + +/* + * Half-band QMF synthesis filter realized with a polyphase FIR filter. + * Join 2 subbands and upsample by 2. + * So for each 2 subbands sample that goes in, a pair of samples goes out. + */ +static void aptx_qmf_polyphase_synthesis(FilterSignal signal[NB_FILTERS], + const int32_t coeffs[NB_FILTERS][FILTER_TAPS], + int shift, + int32_t low_subband_input, + int32_t high_subband_input, + int32_t samples[NB_FILTERS]) +{ + int32_t subbands[NB_FILTERS]; + + subbands[0] = low_subband_input + high_subband_input; + subbands[1] = low_subband_input - high_subband_input; + + for (int i = 0; i < NB_FILTERS; i++) { + aptx_qmf_filter_signal_push(&signal[i], subbands[1-i]); + samples[i] = aptx_qmf_convolution(&signal[i], coeffs[i], shift); + } +} + +/* + * Two stage QMF synthesis tree. + * Join 4 subbands and upsample by 4. + * So for each 4 subbands sample that goes in, a group of 4 samples goes out. + */ +static void aptx_qmf_tree_synthesis(QMFAnalysis *qmf, + int32_t subband_samples[4], + int32_t samples[4]) +{ + int32_t intermediate_samples[4]; + + /* Join 4 subbands into 2 intermediate subbands upsampled to 2 samples. */ + for (int i = 0; i < 2; i++) + aptx_qmf_polyphase_synthesis(qmf->inner_filter_signal[i], + aptx_qmf_inner_coeffs, 22, + subband_samples[2*i+0], + subband_samples[2*i+1], + &intermediate_samples[2*i]); + + /* Join 2 samples from intermediate subbands upsampled to 4 samples. */ + for (int i = 0; i < 2; i++) + aptx_qmf_polyphase_synthesis(qmf->outer_filter_signal, + aptx_qmf_outer_coeffs, 21, + intermediate_samples[0+i], + intermediate_samples[2+i], + &samples[2*i]); +} + + +static int32_t aptx_bin_search(int32_t value, int32_t factor, + const int32_t *intervals, int32_t nb_intervals) +{ + int32_t idx = 0; + + for (int i = nb_intervals >> 1; i > 0; i >>= 1) + if (MUL64(factor, intervals[idx + i]) <= ((int64_t)value << 24)) + idx += i; + + return idx; +} + +static void aptx_quantise_difference(Quantize *quantize, + int32_t sample_difference, + int32_t dither, + int32_t quantization_factor, + ConstTables *tables) +{ + const int32_t *intervals = tables->quantize_intervals; + int32_t quantized_sample, dithered_sample, parity_change; + int32_t d, mean, interval, inv; + int64_t error; + + quantized_sample = aptx_bin_search(FFABS(sample_difference) >> 4, + quantization_factor, + intervals, tables->tables_size); + + d = rshift32_clip24(MULH(dither, dither), 7) - (1 << 23); + d = rshift64(MUL64(d, tables->quantize_dither_factors[quantized_sample]), 23); + + intervals += quantized_sample; + mean = (intervals[1] + intervals[0]) / 2; + interval = intervals[1] - intervals[0]; + if (sample_difference < 0) + interval = -interval; + + dithered_sample = rshift64_clip24(MUL64(dither, interval) + ((int64_t)(mean + d) << 32), 32); + error = ((int64_t)FFABS(sample_difference) << 20) - MUL64(dithered_sample, quantization_factor); + quantize->error = FFABS(rshift64(error, 23)); + + parity_change = quantized_sample; + if (error < 0) quantized_sample--; + else parity_change--; + + inv = -(sample_difference < 0); + quantize->quantized_sample = quantized_sample ^ inv; + quantize->quantized_sample_parity_change = parity_change ^ inv; +} + +static void aptx_encode_channel(Channel *channel, int32_t samples[4]) +{ + int32_t subband_samples[4]; + aptx_qmf_tree_analysis(&channel->qmf, samples, subband_samples); + aptx_generate_dither(channel); + for (int subband = 0; subband < NB_SUBBANDS; subband++) { + int32_t diff = av_clip_intp2(subband_samples[subband] - channel->prediction[subband].predicted_sample, 23); + aptx_quantise_difference(&channel->quantize[subband], diff, + channel->dither[subband], + channel->invert_quantize[subband].quantization_factor, + &tables[subband]); + } +} + +static void aptx_decode_channel(Channel *channel, int32_t samples[4]) +{ + int32_t subband_samples[4]; + for (int subband = 0; subband < NB_SUBBANDS; subband++) + subband_samples[subband] = channel->prediction[subband].previous_reconstructed_sample; + aptx_qmf_tree_synthesis(&channel->qmf, subband_samples, samples); +} + + +static void aptx_invert_quantization(InvertQuantize *invert_quantize, + int32_t quantized_sample, int32_t dither, + ConstTables *tables) +{ + int32_t qr, idx, shift, factor_select; + + if (quantized_sample < 0) idx = -quantized_sample; + else idx = quantized_sample + 1; + qr = tables->quantize_intervals[idx] / 2; + if (quantized_sample < 0) + qr = -qr; + + qr = rshift64_clip24(((int64_t)qr<<32) + MUL64(dither, tables->invert_quantize_dither_factors[idx]), 32); + invert_quantize->reconstructed_difference = MUL64(invert_quantize->quantization_factor, qr) >> 19; + + shift = 24 - tables->quantized_bits; + + /* update factor_select */ + factor_select = 32620 * invert_quantize->factor_select; + factor_select = rshift32(factor_select + (tables->quantize_factor_select_offset[idx] << 15), 15); + invert_quantize->factor_select = av_clip(factor_select, 0, (shift << 8) | 0xFF); + + /* update quantization factor */ + idx = (invert_quantize->factor_select & 0xFF) >> 3; + shift -= invert_quantize->factor_select >> 8; + invert_quantize->quantization_factor = quantization_factors[idx] >> shift; +} + +static int32_t *aptx_reconstructed_differences_update(Prediction *prediction, + int32_t reconstructed_difference, + int order) +{ + int32_t *rd1 = prediction->reconstructed_differences, *rd2 = rd1 + order; + int p = prediction->pos; + + rd1[p] = rd2[p]; + prediction->pos = p = (p + 1) % order; + rd2[p] = reconstructed_difference; + return &rd2[p]; +} + +static void aptx_prediction_filtering(Prediction *prediction, + int32_t reconstructed_difference, + int order) +{ + int32_t reconstructed_sample, predictor, srd0; + int32_t *reconstructed_differences; + int64_t predicted_difference = 0; + + reconstructed_sample = av_clip_intp2(reconstructed_difference + prediction->predicted_sample, 23); + predictor = av_clip_intp2((MUL64(prediction->s_weight[0], prediction->previous_reconstructed_sample) + + MUL64(prediction->s_weight[1], reconstructed_sample)) >> 22, 23); + prediction->previous_reconstructed_sample = reconstructed_sample; + + reconstructed_differences = aptx_reconstructed_differences_update(prediction, reconstructed_difference, order); + srd0 = FFDIFFSIGN(reconstructed_difference, 0) << 23; + for (int i = 0; i < order; i++) { + int32_t srd = FF_SIGNBIT(reconstructed_differences[-i-1]) | 1; + prediction->d_weight[i] -= rshift32(prediction->d_weight[i] - srd*srd0, 8); + predicted_difference += MUL64(reconstructed_differences[-i], prediction->d_weight[i]); + } + + prediction->predicted_difference = av_clip_intp2(predicted_difference >> 22, 23); + prediction->predicted_sample = av_clip_intp2(predictor + prediction->predicted_difference, 23); +} + +static void aptx_process_subband(InvertQuantize *invert_quantize, + Prediction *prediction, + int32_t quantized_sample, int32_t dither, + ConstTables *tables) +{ + int32_t sign, same_sign[2], weight[2], sw1, range; + + aptx_invert_quantization(invert_quantize, quantized_sample, dither, tables); + + sign = FFDIFFSIGN(invert_quantize->reconstructed_difference, + -prediction->predicted_difference); + same_sign[0] = sign * prediction->prev_sign[0]; + same_sign[1] = sign * prediction->prev_sign[1]; + prediction->prev_sign[0] = prediction->prev_sign[1]; + prediction->prev_sign[1] = sign | 1; + + range = 0x100000; + sw1 = rshift32(-same_sign[1] * prediction->s_weight[1], 1); + sw1 = (av_clip(sw1, -range, range) & ~0xF) << 4; + + range = 0x300000; + weight[0] = 254 * prediction->s_weight[0] + 0x800000*same_sign[0] + sw1; + prediction->s_weight[0] = av_clip(rshift32(weight[0], 8), -range, range); + + range = 0x3C0000 - prediction->s_weight[0]; + weight[1] = 255 * prediction->s_weight[1] + 0xC00000*same_sign[1]; + prediction->s_weight[1] = av_clip(rshift32(weight[1], 8), -range, range); + + aptx_prediction_filtering(prediction, + invert_quantize->reconstructed_difference, + tables->prediction_order); +} + +static void aptx_invert_quantize_and_prediction(Channel *channel) +{ + for (int subband = 0; subband < NB_SUBBANDS; subband++) + aptx_process_subband(&channel->invert_quantize[subband], + &channel->prediction[subband], + channel->quantize[subband].quantized_sample, + channel->dither[subband], + &tables[subband]); +} + +static int32_t aptx_quantized_parity(Channel *channel) +{ + int32_t parity = channel->dither_parity; + + for (int subband = 0; subband < NB_SUBBANDS; subband++) + parity ^= channel->quantize[subband].quantized_sample; + + return parity & 1; +} + +/* For each sample, ensure that the parity of all subbands of all channels + * is 0 except once every 8 samples where the parity is forced to 1. */ +static int aptx_check_parity(Channel channels[NB_CHANNELS], int32_t *idx) +{ + int32_t parity = aptx_quantized_parity(&channels[LEFT]) + ^ aptx_quantized_parity(&channels[RIGHT]); + + int eighth = *idx == 7; + *idx = (*idx + 1) % 8; + + return parity ^ eighth; +} + +static void aptx_insert_sync(Channel channels[NB_CHANNELS], int32_t *idx) +{ + if (aptx_check_parity(channels, idx)) { + static const int map[] = { 1, 2, 0, 3 }; + Quantize *min = &channels[NB_CHANNELS-1].quantize[map[0]]; + for (Channel *c = &channels[NB_CHANNELS-1]; c >= channels; c--) + for (int i = 0; i < NB_SUBBANDS; i++) + if (c->quantize[map[i]].error < min->error) + min = &c->quantize[map[i]]; + + /* Forcing the desired parity is done by offsetting by 1 the quantized + * sample from the subband featuring the smallest quantization error. */ + min->quantized_sample = min->quantized_sample_parity_change; + } +} + +static uint16_t aptx_pack_codeword(Channel *channel) +{ + int32_t parity = aptx_quantized_parity(channel); + return (((channel->quantize[3].quantized_sample & 0x06) | parity) << 13) + | (((channel->quantize[2].quantized_sample & 0x03) ) << 11) + | (((channel->quantize[1].quantized_sample & 0x0F) ) << 7) + | (((channel->quantize[0].quantized_sample & 0x7F) ) << 0); +} + +static void aptx_unpack_codeword(Channel *channel, uint16_t codeword) +{ + channel->quantize[0].quantized_sample = sign_extend(codeword >> 0, 7); + channel->quantize[1].quantized_sample = sign_extend(codeword >> 7, 4); + channel->quantize[2].quantized_sample = sign_extend(codeword >> 11, 2); + channel->quantize[3].quantized_sample = sign_extend(codeword >> 13, 3); + channel->quantize[3].quantized_sample = (channel->quantize[3].quantized_sample & ~1) + | aptx_quantized_parity(channel); +} + +static void aptx_encode_samples(AptXContext *ctx, + int32_t samples[NB_CHANNELS][4], + uint8_t output[2*NB_CHANNELS]) +{ + for (int channel = 0; channel < NB_CHANNELS; channel++) + aptx_encode_channel(&ctx->channels[channel], samples[channel]); + + aptx_insert_sync(ctx->channels, &ctx->sync_idx); + + for (int channel = 0; channel < NB_CHANNELS; channel++) { + aptx_invert_quantize_and_prediction(&ctx->channels[channel]); + AV_WB16(output + 2*channel, aptx_pack_codeword(&ctx->channels[channel])); + } +} + +static int aptx_decode_samples(AptXContext *ctx, + const uint8_t input[2*NB_CHANNELS], + int32_t samples[NB_CHANNELS][4]) +{ + int ret; + + for (int channel = 0; channel < NB_CHANNELS; channel++) { + uint16_t codeword; + aptx_generate_dither(&ctx->channels[channel]); + + codeword = AV_RB16(input + 2*channel); + aptx_unpack_codeword(&ctx->channels[channel], codeword); + aptx_invert_quantize_and_prediction(&ctx->channels[channel]); + } + + ret = aptx_check_parity(ctx->channels, &ctx->sync_idx); + + for (int channel = 0; channel < NB_CHANNELS; channel++) + aptx_decode_channel(&ctx->channels[channel], samples[channel]); + + return ret; +} + + +static av_cold int aptx_init(AVCodecContext *avctx) +{ + AptXContext *s = avctx->priv_data; + + for (int chan = 0; chan < NB_CHANNELS; chan++) { + Channel *channel = &s->channels[chan]; + for (int subband = 0; subband < NB_SUBBANDS; subband++) { + Prediction *prediction = &channel->prediction[subband]; + prediction->prev_sign[0] = 1; + prediction->prev_sign[1] = 1; + } + } + + return 0; +} + +static int aptx_decode_frame(AVCodecContext *avctx, void *data, + int *got_frame_ptr, AVPacket *avpkt) +{ + AptXContext *s = avctx->priv_data; + AVFrame *frame = data; + const uint8_t *buf = avpkt->data; + int len = avpkt->size; + int16_t *ptr; + int ret; + + if (len < 4) { + av_log(avctx, AV_LOG_ERROR, "Packet is too small\n"); + return AVERROR_INVALIDDATA; + } + + /* get output buffer */ + frame->nb_samples = len & ~3; + if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) + return ret; + ptr = (int16_t *)frame->data[0]; + + while (len >= 4) { + int32_t samples[NB_CHANNELS][4]; + + if (aptx_decode_samples(s, buf, samples)) + av_log(avctx, AV_LOG_ERROR, "Synchronization error\n"); + + for (int i = 0; i < 4; i++) { + /* convert 24 bits planar samples to 16 bits interleaved output */ + *ptr++ = samples[LEFT ][i] >> 8; + *ptr++ = samples[RIGHT][i] >> 8; + } + + buf += 4; + len -= 4; + } + + *got_frame_ptr = 1; + return avpkt->size - len; +} + +static int aptx_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, + const AVFrame *frame, int *got_packet_ptr) +{ + AptXContext *s = avctx->priv_data; + int16_t *ptr = (int16_t *)frame->data[0]; + int32_t samples[NB_CHANNELS][4]; + int ret; + + /* input must contain a multiple of 4 samples */ + if (frame->nb_samples & 3 || frame->nb_samples == 0) { + av_log(avctx, AV_LOG_ERROR, "Frame must have a multiple of 4 samples\n"); + return 0; + } + + if ((ret = ff_alloc_packet2(avctx, avpkt, frame->nb_samples, 0)) < 0) + return ret; + + for (int pos = 0; pos < frame->nb_samples; pos += 4) { + for (int i = 0; i < 4; i++) { + /* convert 16 bits interleaved input to 24 bits planar samples */ + samples[LEFT][i] = ptr[LEFT ] << 8; + samples[RIGHT][i] = ptr[RIGHT] << 8; + ptr += NB_CHANNELS; + } + + aptx_encode_samples(s, samples, avpkt->data + pos); + } + + *got_packet_ptr = 1; + return 0; +} + + +#if CONFIG_APTX_DECODER +AVCodec ff_aptx_decoder = { + .name = "aptx", + .long_name = NULL_IF_CONFIG_SMALL("aptX (Audio Processing Technology for Bluetooth)"), + .type = AVMEDIA_TYPE_AUDIO, + .id = AV_CODEC_ID_APTX, + .priv_data_size = sizeof(AptXContext), + .init = aptx_init, + .decode = aptx_decode_frame, + .capabilities = AV_CODEC_CAP_DR1, + .channel_layouts = (const uint64_t[]) { AV_CH_LAYOUT_STEREO, 0}, + .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16, + AV_SAMPLE_FMT_NONE }, +}; +#endif + +#if CONFIG_APTX_ENCODER +AVCodec ff_aptx_encoder = { + .name = "aptx", + .long_name = NULL_IF_CONFIG_SMALL("aptX (Audio Processing Technology for Bluetooth)"), + .type = AVMEDIA_TYPE_AUDIO, + .id = AV_CODEC_ID_APTX, + .priv_data_size = sizeof(AptXContext), + .init = aptx_init, + .encode2 = aptx_encode_frame, + .capabilities = AV_CODEC_CAP_VARIABLE_FRAME_SIZE, + .channel_layouts = (const uint64_t[]) { AV_CH_LAYOUT_STEREO, 0}, + .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16, + AV_SAMPLE_FMT_NONE }, +}; +#endif diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index c4134424f0..36a99f4162 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -614,6 +614,7 @@ enum AVCodecID { AV_CODEC_ID_PAF_AUDIO, AV_CODEC_ID_ON2AVC, AV_CODEC_ID_DSS_SP, + AV_CODEC_ID_APTX, AV_CODEC_ID_FFWAVESYNTH = 0x15800, AV_CODEC_ID_SONIC, diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c index 92bf1d2681..c3688de1d6 100644 --- a/libavcodec/codec_desc.c +++ b/libavcodec/codec_desc.c @@ -2859,6 +2859,13 @@ static const AVCodecDescriptor codec_descriptors[] = { .long_name = NULL_IF_CONFIG_SMALL("ADPCM MTAF"), .props = AV_CODEC_PROP_LOSSY, }, + { + .id = AV_CODEC_ID_APTX, + .type = AVMEDIA_TYPE_AUDIO, + .name = "aptx", + .long_name = NULL_IF_CONFIG_SMALL("aptX (Audio Processing Technology for Bluetooth)"), + .props = AV_CODEC_PROP_LOSSY, + }, /* subtitle codecs */ {

[FFmpeg-devel,1/2] aptx: implement the aptX bluetooth codec

Commit Message

Comments

Patch