From 95625b470fb61676e838310dc0108eb648f972a3 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 28 Oct 2022 21:36:10 +0200
Subject: [PATCH] aacdec: convert to lavu/tx and support fixed-point 960-sample
decoding
This patch replaces the transform used in AAC with lavu/tx and removes
the limitation on only being able to decode 960-sample files
with the float decoder.
This commit also removes a whole bunch of unnecessary and slow
lifting steps the decoder did to compensate for the poor accuracy
of the old integer transformation code.
Overall float decoder speedup on Zen 3 for 64kbps: 32%
---
configure | 4 +-
libavcodec/aac.h | 27 ++++---
libavcodec/aac_defines.h | 4 +-
libavcodec/aacdec.c | 4 +-
libavcodec/aacdec_fixed.c | 5 +-
libavcodec/aacdec_template.c | 106 ++++++++++------------------
libavcodec/aacsbr.c | 1 -
libavcodec/aacsbr_fixed.c | 1 -
libavcodec/mips/aacdec_mips.c | 4 +-
libavcodec/sinewin_fixed_tablegen.c | 3 +
libavcodec/sinewin_fixed_tablegen.h | 6 ++
11 files changed, 74 insertions(+), 91 deletions(-)
@@ -2763,8 +2763,8 @@ vc1dsp_select="h264chroma qpeldsp startcode"
rdft_select="fft"
# decoders / encoders
-aac_decoder_select="adts_header mdct15 mdct mpeg4audio sinewin"
-aac_fixed_decoder_select="adts_header mdct mpeg4audio"
+aac_decoder_select="adts_header mpeg4audio sinewin"
+aac_fixed_decoder_select="adts_header mpeg4audio"
aac_encoder_select="audio_frame_queue iirfilter lpc mdct sinewin"
aac_latm_decoder_select="aac_decoder aac_latm_parser"
ac3_decoder_select="ac3_parser ac3dsp bswapdsp fmtconvert mdct"
@@ -36,11 +36,8 @@
#include "libavutil/float_dsp.h"
#include "libavutil/fixed_dsp.h"
#include "libavutil/mem_internal.h"
+#include "libavutil/tx.h"
#include "avcodec.h"
-#if !USE_FIXED
-#include "mdct15.h"
-#endif
-#include "fft.h"
#include "mpeg4audio.h"
#include "sbr.h"
@@ -326,16 +323,24 @@ struct AACContext {
* @name Computed / set up during initialization
* @{
*/
- FFTContext mdct;
- FFTContext mdct_small;
- FFTContext mdct_ld;
- FFTContext mdct_ltp;
+ AVTXContext *mdct120;
+ AVTXContext *mdct128;
+ AVTXContext *mdct480;
+ AVTXContext *mdct512;
+ AVTXContext *mdct960;
+ AVTXContext *mdct1024;
+ AVTXContext *mdct_ltp;
+
+ av_tx_fn mdct120_fn;
+ av_tx_fn mdct128_fn;
+ av_tx_fn mdct480_fn;
+ av_tx_fn mdct512_fn;
+ av_tx_fn mdct960_fn;
+ av_tx_fn mdct1024_fn;
+ av_tx_fn mdct_ltp_fn;
#if USE_FIXED
AVFixedDSPContext *fdsp;
#else
- MDCT15Context *mdct120;
- MDCT15Context *mdct480;
- MDCT15Context *mdct960;
AVFloatDSPContext *fdsp;
#endif /* USE_FIXED */
int random_state;
@@ -45,7 +45,7 @@ typedef int AAC_SIGNE;
#define Q23(a) (int)((a) * 8388608.0 + 0.5)
#define Q30(x) (int)((x)*1073741824.0 + 0.5)
#define Q31(x) (int)((x)*2147483648.0 + 0.5)
-#define RANGE15(x) x
+#define TX_SCALE(x) ((x) * 128.0f)
#define GET_GAIN(x, y) (-(y) * (1 << (x))) + 1024
#define AAC_MUL16(x, y) (int)(((int64_t)(x) * (y) + 0x8000) >> 16)
#define AAC_MUL26(x, y) (int)(((int64_t)(x) * (y) + 0x2000000) >> 26)
@@ -94,7 +94,7 @@ typedef unsigned AAC_SIGNE;
#define Q23(x) ((float)(x))
#define Q30(x) ((float)(x))
#define Q31(x) ((float)(x))
-#define RANGE15(x) (32768.0 * (x))
+#define TX_SCALE(x) ((x) / 32768.0f)
#define GET_GAIN(x, y) powf((x), -(y))
#define AAC_MUL16(x, y) ((x) * (y))
#define AAC_MUL26(x, y) ((x) * (y))
@@ -32,16 +32,14 @@
* @author Maxim Gavrilov ( maxim.gavrilov gmail com )
*/
-#define FFT_FLOAT 1
#define USE_FIXED 0
+#define TX_TYPE AV_TX_FLOAT_MDCT
#include "libavutil/float_dsp.h"
#include "libavutil/opt.h"
#include "avcodec.h"
#include "codec_internal.h"
#include "get_bits.h"
-#include "fft.h"
-#include "mdct15.h"
#include "lpc.h"
#include "kbdwin.h"
#include "sinewin.h"
@@ -58,15 +58,14 @@
* @author Stanislav Ocovaj ( stanislav.ocovaj imgtec com )
*/
-#define FFT_FLOAT 0
#define USE_FIXED 1
+#define TX_TYPE AV_TX_INT32_MDCT
#include "libavutil/fixed_dsp.h"
#include "libavutil/opt.h"
#include "avcodec.h"
#include "codec_internal.h"
#include "get_bits.h"
-#include "fft.h"
#include "lpc.h"
#include "kbdwin.h"
#include "sinewin_fixed_tablegen.h"
@@ -87,6 +86,8 @@
DECLARE_ALIGNED(32, static int, AAC_RENAME2(aac_kbd_long_1024))[1024];
DECLARE_ALIGNED(32, static int, AAC_RENAME2(aac_kbd_short_128))[128];
+DECLARE_ALIGNED(32, static int, AAC_RENAME2(aac_kbd_long_960))[960];
+DECLARE_ALIGNED(32, static int, AAC_RENAME2(aac_kbd_short_120))[120];
static av_always_inline void reset_predict_state(PredictorState *ps)
{
@@ -931,13 +931,6 @@ static int decode_ga_specific_config(AACContext *ac, AVCodecContext *avctx,
uint8_t layout_map[MAX_ELEM_ID*4][3];
int tags = 0;
-#if USE_FIXED
- if (get_bits1(gb)) { // frameLengthFlag
- avpriv_report_missing_feature(avctx, "Fixed point 960/120 MDCT window");
- return AVERROR_PATCHWELCOME;
- }
- m4ac->frame_length_short = 0;
-#else
m4ac->frame_length_short = get_bits1(gb);
if (m4ac->frame_length_short && m4ac->sbr == 1) {
avpriv_report_missing_feature(avctx, "SBR with 960 frame length");
@@ -945,7 +938,6 @@ static int decode_ga_specific_config(AACContext *ac, AVCodecContext *avctx,
m4ac->sbr = 0;
m4ac->ps = 0;
}
-#endif
if (get_bits1(gb)) // dependsOnCoreCoder
skip_bits(gb, 14); // coreCoderDelay
@@ -1022,14 +1014,8 @@ static int decode_eld_specific_config(AACContext *ac, AVCodecContext *avctx,
m4ac->ps = 0;
m4ac->sbr = 0;
-#if USE_FIXED
- if (get_bits1(gb)) { // frameLengthFlag
- avpriv_request_sample(avctx, "960/120 MDCT window");
- return AVERROR_PATCHWELCOME;
- }
-#else
m4ac->frame_length_short = get_bits1(gb);
-#endif
+
res_flags = get_bits(gb, 3);
if (res_flags) {
avpriv_report_missing_feature(avctx,
@@ -1256,9 +1242,10 @@ static av_cold void aac_static_table_init(void)
352);
// window initialization
-#if !USE_FIXED
AAC_RENAME(ff_kbd_window_init)(AAC_RENAME(aac_kbd_long_960), 4.0, 960);
AAC_RENAME(ff_kbd_window_init)(AAC_RENAME(aac_kbd_short_120), 6.0, 120);
+
+#if !USE_FIXED
AAC_RENAME(ff_sine_window_init)(AAC_RENAME(sine_960), 960);
AAC_RENAME(ff_sine_window_init)(AAC_RENAME(sine_120), 120);
AAC_RENAME(ff_init_ff_sine_windows)(9);
@@ -1276,6 +1263,7 @@ static AVOnce aac_table_init = AV_ONCE_INIT;
static av_cold int aac_decode_init(AVCodecContext *avctx)
{
+ float scale;
AACContext *ac = avctx->priv_data;
int ret;
@@ -1348,21 +1336,25 @@ static av_cold int aac_decode_init(AVCodecContext *avctx)
ac->random_state = 0x1f2e3d4c;
- AAC_RENAME_32(ff_mdct_init)(&ac->mdct, 11, 1, 1.0 / RANGE15(1024.0));
- AAC_RENAME_32(ff_mdct_init)(&ac->mdct_ld, 10, 1, 1.0 / RANGE15(512.0));
- AAC_RENAME_32(ff_mdct_init)(&ac->mdct_small, 8, 1, 1.0 / RANGE15(128.0));
- AAC_RENAME_32(ff_mdct_init)(&ac->mdct_ltp, 11, 0, RANGE15(-2.0));
-#if !USE_FIXED
- ret = ff_mdct15_init(&ac->mdct120, 1, 3, 1.0f/(16*1024*120*2));
- if (ret < 0)
- return ret;
- ret = ff_mdct15_init(&ac->mdct480, 1, 5, 1.0f/(16*1024*960));
- if (ret < 0)
+#define MDCT_INIT(s, fn, len, sval) \
+ scale = sval; \
+ ret = av_tx_init(&s, &fn, TX_TYPE, 1, len, &scale, 0); \
+ if (ret < 0) \
return ret;
- ret = ff_mdct15_init(&ac->mdct960, 1, 6, 1.0f/(16*1024*960*2));
+
+ MDCT_INIT(ac->mdct120, ac->mdct120_fn, 120, TX_SCALE(1.0/120))
+ MDCT_INIT(ac->mdct128, ac->mdct128_fn, 128, TX_SCALE(1.0/128))
+ MDCT_INIT(ac->mdct480, ac->mdct480_fn, 480, TX_SCALE(1.0/480))
+ MDCT_INIT(ac->mdct512, ac->mdct512_fn, 512, TX_SCALE(1.0/512))
+ MDCT_INIT(ac->mdct960, ac->mdct960_fn, 960, TX_SCALE(1.0/960))
+ MDCT_INIT(ac->mdct1024, ac->mdct1024_fn, 1024, TX_SCALE(1.0/1024))
+#undef MDCT_INIT
+
+ /* LTP forward MDCT */
+ scale = USE_FIXED ? -1.0 : -32786.0*2 + 36;
+ ret = av_tx_init(&ac->mdct_ltp, &ac->mdct_ltp_fn, TX_TYPE, 0, 1024, &scale, 0);
if (ret < 0)
return ret;
-#endif
return 0;
}
@@ -2691,7 +2683,7 @@ static void windowing_and_mdct_ltp(AACContext *ac, INTFLOAT *out,
ac->fdsp->vector_fmul_reverse(in + 1024 + 448, in + 1024 + 448, swindow, 128);
memset(in + 1024 + 576, 0, 448 * sizeof(*in));
}
- ac->mdct_ltp.mdct_calc(&ac->mdct_ltp, out, in);
+ ac->mdct_ltp_fn(ac->mdct_ltp, out, in, sizeof(INTFLOAT));
}
/**
@@ -2783,13 +2775,9 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
// imdct
if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
for (i = 0; i < 1024; i += 128)
- ac->mdct_small.imdct_half(&ac->mdct_small, buf + i, in + i);
+ ac->mdct128_fn(ac->mdct128, buf + i, in + i, sizeof(INTFLOAT));
} else {
- ac->mdct.imdct_half(&ac->mdct, buf, in);
-#if USE_FIXED
- for (i=0; i<1024; i++)
- buf[i] = (buf[i] + 4LL) >> 3;
-#endif /* USE_FIXED */
+ ac->mdct1024_fn(ac->mdct1024, buf, in, sizeof(INTFLOAT));
}
/* window overlapping
@@ -2837,7 +2825,6 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
*/
static void imdct_and_windowing_960(AACContext *ac, SingleChannelElement *sce)
{
-#if !USE_FIXED
IndividualChannelStream *ics = &sce->ics;
INTFLOAT *in = sce->coeffs;
INTFLOAT *out = sce->ret;
@@ -2852,9 +2839,9 @@ static void imdct_and_windowing_960(AACContext *ac, SingleChannelElement *sce)
// imdct
if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
for (i = 0; i < 8; i++)
- ac->mdct120->imdct_half(ac->mdct120, buf + i * 120, in + i * 128, 1);
+ ac->mdct120_fn(ac->mdct120, buf + i * 120, in + i * 128, sizeof(INTFLOAT));
} else {
- ac->mdct960->imdct_half(ac->mdct960, buf, in, 1);
+ ac->mdct960_fn(ac->mdct960, buf, in, sizeof(INTFLOAT));
}
/* window overlapping
@@ -2896,7 +2883,6 @@ static void imdct_and_windowing_960(AACContext *ac, SingleChannelElement *sce)
} else { // LONG_STOP or ONLY_LONG
memcpy( saved, buf + 480, 480 * sizeof(*saved));
}
-#endif
}
static void imdct_and_windowing_ld(AACContext *ac, SingleChannelElement *sce)
{
@@ -2905,17 +2891,9 @@ static void imdct_and_windowing_ld(AACContext *ac, SingleChannelElement *sce)
INTFLOAT *out = sce->ret;
INTFLOAT *saved = sce->saved;
INTFLOAT *buf = ac->buf_mdct;
-#if USE_FIXED
- int i;
-#endif /* USE_FIXED */
// imdct
- ac->mdct.imdct_half(&ac->mdct_ld, buf, in);
-
-#if USE_FIXED
- for (i = 0; i < 1024; i++)
- buf[i] = (buf[i] + 2) >> 2;
-#endif /* USE_FIXED */
+ ac->mdct512_fn(ac->mdct512, buf, in, sizeof(INTFLOAT));
// window overlapping
if (ics->use_kb_window[1]) {
@@ -2954,20 +2932,15 @@ static void imdct_and_windowing_eld(AACContext *ac, SingleChannelElement *sce)
temp = in[i ]; in[i ] = -in[n - 1 - i]; in[n - 1 - i] = temp;
temp = -in[i + 1]; in[i + 1] = in[n - 2 - i]; in[n - 2 - i] = temp;
}
-#if !USE_FIXED
+
if (n == 480)
- ac->mdct480->imdct_half(ac->mdct480, buf, in, 1);
+ ac->mdct480_fn(ac->mdct480, buf, in, sizeof(INTFLOAT));
else
-#endif
- ac->mdct.imdct_half(&ac->mdct_ld, buf, in);
-
-#if USE_FIXED
- for (i = 0; i < 1024; i++)
- buf[i] = (buf[i] + 1) >> 1;
-#endif /* USE_FIXED */
+ ac->mdct512_fn(ac->mdct512, buf, in, sizeof(INTFLOAT));
for (i = 0; i < n; i+=2) {
- buf[i] = -buf[i];
+ buf[i + 0] = -(USE_FIXED + 1)*buf[i + 0];
+ buf[i + 1] = (USE_FIXED + 1)*buf[i + 1];
}
// Like with the regular IMDCT at this point we still have the middle half
// of a transform but with even symmetry on the left and odd symmetry on
@@ -3529,15 +3502,14 @@ static av_cold int aac_decode_close(AVCodecContext *avctx)
}
}
- ff_mdct_end(&ac->mdct);
- ff_mdct_end(&ac->mdct_small);
- ff_mdct_end(&ac->mdct_ld);
- ff_mdct_end(&ac->mdct_ltp);
-#if !USE_FIXED
- ff_mdct15_uninit(&ac->mdct120);
- ff_mdct15_uninit(&ac->mdct480);
- ff_mdct15_uninit(&ac->mdct960);
-#endif
+ av_tx_uninit(&ac->mdct120);
+ av_tx_uninit(&ac->mdct128);
+ av_tx_uninit(&ac->mdct480);
+ av_tx_uninit(&ac->mdct512);
+ av_tx_uninit(&ac->mdct960);
+ av_tx_uninit(&ac->mdct1024);
+ av_tx_uninit(&ac->mdct_ltp);
+
av_freep(&ac->fdsp);
return 0;
}
@@ -31,7 +31,6 @@
#include "sbr.h"
#include "aacsbr.h"
#include "aacsbrdata.h"
-#include "fft.h"
#include "internal.h"
#include "aacps.h"
#include "sbrdsp.h"
@@ -60,7 +60,6 @@
#include "sbr.h"
#include "aacsbr.h"
#include "aacsbrdata.h"
-#include "fft.h"
#include "aacps.h"
#include "sbrdsp.h"
#include "libavutil/internal.h"
@@ -126,9 +126,9 @@ static void imdct_and_windowing_mips(AACContext *ac, SingleChannelElement *sce)
if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
for (i = 0; i < 1024; i += 128)
- ac->mdct_small.imdct_half(&ac->mdct_small, buf + i, in + i);
+ ac->mdct128_fn(ac->mdct128, buf + i, in + i, sizeof(float));
} else
- ac->mdct.imdct_half(&ac->mdct, buf, in);
+ ac->mdct1024_fn(ac->mdct1024, buf, in, sizeof(float));
/* window overlapping
* NOTE: To simplify the overlapping code, all 'meaningless' short to long
@@ -35,8 +35,11 @@ int main(void)
printf("SINETABLE("#size") = {\n"); \
write_int32_t_array(sine_ ## size ## _fixed, size); \
printf("};\n")
+ PRINT_TABLE(120);
PRINT_TABLE(128);
+ PRINT_TABLE(480);
PRINT_TABLE(512);
+ PRINT_TABLE(960);
PRINT_TABLE(1024);
return 0;
}
@@ -44,8 +44,11 @@
#include "libavutil/attributes.h"
#define SINETABLE_CONST
+SINETABLE( 120);
SINETABLE( 128);
+SINETABLE( 480);
SINETABLE( 512);
+SINETABLE( 960);
SINETABLE(1024);
#define SIN_FIX(a) (int)floor((a) * 0x80000000 + 0.5)
@@ -59,8 +62,11 @@ static av_cold void sine_window_init_fixed(int *window, int n)
static av_cold void init_sine_windows_fixed(void)
{
+ sine_window_init_fixed(sine_120_fixed, 120);
sine_window_init_fixed(sine_128_fixed, 128);
+ sine_window_init_fixed(sine_480_fixed, 480);
sine_window_init_fixed(sine_512_fixed, 512);
+ sine_window_init_fixed(sine_960_fixed, 960);
sine_window_init_fixed(sine_1024_fixed, 1024);
}
#endif /* CONFIG_HARDCODED_TABLES */
--
2.37.2.609.g9ff673ca1a