From patchwork Tue May 14 17:17:51 2019
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Lynne <dev@lynne.ee>
X-Patchwork-Id: 13112
Return-Path: <ffmpeg-devel-bounces@ffmpeg.org>
X-Original-To: patchwork@ffaux-bg.ffmpeg.org
Delivered-To: patchwork@ffaux-bg.ffmpeg.org
Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100])
	by ffaux.localdomain (Postfix) with ESMTP id F0228447E18
	for <patchwork@ffaux-bg.ffmpeg.org>;
	Tue, 14 May 2019 20:17:58 +0300 (EEST)
Received: from [127.0.1.1] (localhost [127.0.0.1])
	by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id D61276882FF;
	Tue, 14 May 2019 20:17:58 +0300 (EEST)
X-Original-To: ffmpeg-devel@ffmpeg.org
Delivered-To: ffmpeg-devel@ffmpeg.org
Received: from w4.tutanota.de (w4.tutanota.de [81.3.6.165])
	by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 23C546809C5
	for <ffmpeg-devel@ffmpeg.org>; Tue, 14 May 2019 20:17:52 +0300 (EEST)
Received: from w2.tutanota.de (unknown [192.168.1.163])
	by w4.tutanota.de (Postfix) with ESMTP id 940711060200
	for <ffmpeg-devel@ffmpeg.org>; Tue, 14 May 2019 17:17:51 +0000 (UTC)
Date: Tue, 14 May 2019 19:17:51 +0200 (CEST)
From: Lynne <dev@lynne.ee>
To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Message-ID: <LerLQMQ--3-1@lynne.ee>
In-Reply-To: 
 <CAPYw7P74tv0dqUQ4gyVhiaAAobNSNERZx_TWUwZZx3o6dUBwXw@mail.gmail.com>
References: <LeXIvak--3-1@lynne.ee>
	<CAPYw7P74tv0dqUQ4gyVhiaAAobNSNERZx_TWUwZZx3o6dUBwXw@mail.gmail.com>
MIME-Version: 1.0
Subject: Re: [FFmpeg-devel] [PATCH v2] libavutil: add an FFT &
	MDCT	implementation
X-BeenThere: ffmpeg-devel@ffmpeg.org
X-Mailman-Version: 2.1.20
Precedence: list
List-Id: FFmpeg development discussions and patches <ffmpeg-devel.ffmpeg.org>
List-Unsubscribe: <http://ffmpeg.org/mailman/options/ffmpeg-devel>,
	<mailto:ffmpeg-devel-request@ffmpeg.org?subject=unsubscribe>
List-Archive: <http://ffmpeg.org/pipermail/ffmpeg-devel/>
List-Post: <mailto:ffmpeg-devel@ffmpeg.org>
List-Help: <mailto:ffmpeg-devel-request@ffmpeg.org?subject=help>
List-Subscribe: <http://ffmpeg.org/mailman/listinfo/ffmpeg-devel>,
	<mailto:ffmpeg-devel-request@ffmpeg.org?subject=subscribe>
Reply-To: FFmpeg development discussions and patches
	<ffmpeg-devel@ffmpeg.org>
Errors-To: ffmpeg-devel-bounces@ffmpeg.org
Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

May 14, 2019, 5:55 PM by onemda@gmail.com:

> On 5/10/19, Lynne <> dev@lynne.ee <mailto:dev@lynne.ee>> > wrote:
>
>> Patch updated again.
>> Made some more cleanups to the transforms, the tables and the main context.
>> API changed again, now the init function populates the function pointer for
>> transform.
>> I decided that having a separate function would encourage bad usage (e.g.
>> calling
>> the function every time before doing a transform rather than storing the
>> pointer) when
>> we're trying to avoid the overhead of function calls.
>> Also adjusted file names to match the API.
>>
>
> LGTM, going to apply soon.
>

I've attached the latest version. Not much changed, just some cleaning up, twiddle
adjustments to prepare for SIMD and making the scale argument a const.
I've removed AV_TX_NB, wasn't used, if needed it can be added without breaking the API.
Added #include <stddef.h> to tx.h as ptrdiff_t is defined there.

One thing to know when using it as an MDCT is that unlike ff_mdct_init where the
window size is required (e.g. for a 1024 sample MDCT you'd put in 2048 since that's
your window size), here the frame size is used, so you'd put in a length of 1024 for
a 1024 sample MDCT (which has a window size of 2048 samples).
Not sure if it makes more sense in general, but we can't change behavior after its
pushed as it would break the API.

I've attached a diff which replaces the MDCT in aacenc, vorbisdec/enc, atrac9dec and
opusdec/enc with this one for testing. Passes FATE.

diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c
index 4d0abb107f..a449c0e175 100644
--- a/libavcodec/aacenc.c
+++ b/libavcodec/aacenc.c
@@ -201,15 +201,15 @@ static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce,
                                   float *audio)
 {
     int i;
-    const float *output = sce->ret_buf;
+    float *output = sce->ret_buf;
 
     apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, audio);
 
     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE)
-        s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
+        s->mdct1024_fn(s->mdct1024, sce->coeffs, output, sizeof(float));
     else
         for (i = 0; i < 1024; i += 128)
-            s->mdct128.mdct_calc(&s->mdct128, &sce->coeffs[i], output + i*2);
+            s->mdct128_fn(s->mdct128, &sce->coeffs[i], output + i*2, sizeof(float));
     memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
     memcpy(sce->pcoeffs, sce->coeffs, sizeof(sce->pcoeffs));
 }
@@ -665,7 +665,7 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
             if (s->options.ltp && s->coder->update_ltp) {
                 s->coder->update_ltp(s, sce);
                 apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, &sce->ltp_state[0]);
-                s->mdct1024.mdct_calc(&s->mdct1024, sce->lcoeffs, sce->ret_buf);
+                s->mdct1024_fn(s->mdct1024, sce->lcoeffs, sce->ret_buf, sizeof(float));
             }
 
             for (k = 0; k < 1024; k++) {
@@ -902,8 +902,8 @@ static av_cold int aac_encode_end(AVCodecContext *avctx)
 
     av_log(avctx, AV_LOG_INFO, "Qavg: %.3f\n", s->lambda_sum / s->lambda_count);
 
-    ff_mdct_end(&s->mdct1024);
-    ff_mdct_end(&s->mdct128);
+    av_tx_uninit(&s->mdct1024);
+    av_tx_uninit(&s->mdct128);
     ff_psy_end(&s->psy);
     ff_lpc_end(&s->lpc);
     if (s->psypp)
@@ -918,6 +918,7 @@ static av_cold int aac_encode_end(AVCodecContext *avctx)
 static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
 {
     int ret = 0;
+    const float scale = 32768.0;
 
     s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
     if (!s->fdsp)
@@ -929,9 +930,9 @@ static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
     ff_init_ff_sine_windows(10);
     ff_init_ff_sine_windows(7);
 
-    if ((ret = ff_mdct_init(&s->mdct1024, 11, 0, 32768.0)) < 0)
+    if ((ret = av_tx_init(&s->mdct1024, &s->mdct1024_fn, AV_TX_FLOAT_MDCT, 0, 1024, &scale, 0)) < 0)
         return ret;
-    if ((ret = ff_mdct_init(&s->mdct128,   8, 0, 32768.0)) < 0)
+    if ((ret = av_tx_init(&s->mdct128, &s->mdct128_fn,   AV_TX_FLOAT_MDCT, 0,  128, &scale, 0)) < 0)
         return ret;
 
     return 0;
diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h
index 5a015ca92e..f8d157a3ca 100644
--- a/libavcodec/aacenc.h
+++ b/libavcodec/aacenc.h
@@ -23,6 +23,7 @@
 #define AVCODEC_AACENC_H
 
 #include "libavutil/float_dsp.h"
+#include "libavutil/tx.h"
 #include "avcodec.h"
 #include "put_bits.h"
 
@@ -377,8 +378,10 @@ typedef struct AACEncContext {
     AVClass *av_class;
     AACEncOptions options;                       ///< encoding options
     PutBitContext pb;
-    FFTContext mdct1024;                         ///< long (1024 samples) frame transform context
-    FFTContext mdct128;                          ///< short (128 samples) frame transform context
+    AVTXContext *mdct1024;                       ///< long (1024 samples) frame transform context
+    av_tx_fn mdct1024_fn;
+    AVTXContext *mdct128;                        ///< short (128 samples) frame transform context
+    av_tx_fn mdct128_fn;
     AVFloatDSPContext *fdsp;
     AACPCEInfo pce;                              ///< PCE data, if needed
     float *planar_samples[16];                   ///< saved preprocessed input
diff --git a/libavcodec/atrac9dec.c b/libavcodec/atrac9dec.c
index 805d46f3b8..423debd49c 100644
--- a/libavcodec/atrac9dec.c
+++ b/libavcodec/atrac9dec.c
@@ -21,8 +21,8 @@
 
 #include "internal.h"
 #include "get_bits.h"
-#include "fft.h"
 #include "atrac9tab.h"
+#include "libavutil/tx.h"
 #include "libavutil/lfg.h"
 #include "libavutil/float_dsp.h"
 
@@ -76,7 +76,8 @@ typedef struct ATRAC9BlockData {
 typedef struct ATRAC9Context {
     AVCodecContext *avctx;
     AVFloatDSPContext *fdsp;
-    FFTContext imdct;
+    AVTXContext *tx;
+    av_tx_fn txfn;
     ATRAC9BlockData block[5];
     AVLFG lfg;
 
@@ -751,7 +752,7 @@ imdct:
         const ptrdiff_t offset = wsize*frame_idx*sizeof(float);
         float *dst = (float *)(frame->extended_data[dst_idx] + offset);
 
-        s->imdct.imdct_half(&s->imdct, s->temp, c->coeffs);
+        s->txfn(s->tx, s->temp, c->coeffs, sizeof(float));
         s->fdsp->vector_fmul_window(dst, c->prev_win, s->temp,
                                     s->imdct_win, wsize >> 1);
         memcpy(c->prev_win, s->temp + (wsize >> 1), sizeof(float)*wsize >> 1);
@@ -817,7 +818,7 @@ static av_cold int atrac9_decode_close(AVCodecContext *avctx)
             for (int k = 0; k < 4; k++)
                 ff_free_vlc(&s->coeff_vlc[i][j][k]);
 
-    ff_mdct_end(&s->imdct);
+    av_tx_uninit(&s->tx);
     av_free(s->fdsp);
 
     return 0;
@@ -825,8 +826,10 @@ static av_cold int atrac9_decode_close(AVCodecContext *avctx)
 
 static av_cold int atrac9_decode_init(AVCodecContext *avctx)
 {
+    int ret;
     GetBitContext gb;
     ATRAC9Context *s = avctx->priv_data;
+    const float mdct_scale = 1.0 / 32768;
     int version, block_config_idx, superframe_idx, alloc_c_len;
 
     s->avctx = avctx;
@@ -881,8 +884,9 @@ static av_cold int atrac9_decode_init(AVCodecContext *avctx)
     s->frame_count = 1 << superframe_idx;
     s->frame_log2  = at9_tab_sri_frame_log2[s->samplerate_idx];
 
-    if (ff_mdct_init(&s->imdct, s->frame_log2 + 1, 1, 1.0f / 32768.0f))
-        return AVERROR(ENOMEM);
+    if ((ret = av_tx_init(&s->tx, &s->txfn, AV_TX_FLOAT_MDCT, 1,
+                          1 << s->frame_log2, &mdct_scale, 0)))
+        return ret;
 
     s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
     if (!s->fdsp)
diff --git a/libavcodec/opus_celt.c b/libavcodec/opus_celt.c
index 4655172b09..887e189f6d 100644
--- a/libavcodec/opus_celt.c
+++ b/libavcodec/opus_celt.c
@@ -323,7 +323,8 @@ int ff_celt_decode_frame(CeltFrame *f, OpusRangeCoder *rc,
 {
     int i, j, downmix = 0;
     int consumed;           // bits of entropy consumed thus far for this frame
-    MDCT15Context *imdct;
+    AVTXContext *tx;
+    av_tx_fn txfn;
 
     if (channels != 1 && channels != 2) {
         av_log(f->avctx, AV_LOG_ERROR, "Invalid number of coded channels: %d\n",
@@ -385,7 +386,8 @@ int ff_celt_decode_frame(CeltFrame *f, OpusRangeCoder *rc,
     f->blocks    = f->transient ? 1 << f->size : 1;
     f->blocksize = frame_size / f->blocks;
 
-    imdct = f->imdct[f->transient ? 0 : f->size];
+    tx = f->tx[f->transient ? 0 : f->size];
+    txfn = f->txfn[f->transient ? 0 : f->size];
 
     if (channels == 1) {
         for (i = 0; i < CELT_MAX_BANDS; i++)
@@ -440,8 +442,8 @@ int ff_celt_decode_frame(CeltFrame *f, OpusRangeCoder *rc,
         for (j = 0; j < f->blocks; j++) {
             float *dst  = block->buf + 1024 + j * f->blocksize;
 
-            imdct->imdct_half(imdct, dst + CELT_OVERLAP / 2, f->block[i].coeffs + j,
-                              f->blocks);
+            txfn(tx, dst + CELT_OVERLAP / 2, f->block[i].coeffs + j,
+                 f->blocks * sizeof(float));
             f->dsp->vector_fmul_window(dst, dst, dst + CELT_OVERLAP / 2,
                                        ff_celt_window, CELT_OVERLAP / 2);
         }
@@ -522,8 +524,8 @@ void ff_celt_free(CeltFrame **f)
     if (!frm)
         return;
 
-    for (i = 0; i < FF_ARRAY_ELEMS(frm->imdct); i++)
-        ff_mdct15_uninit(&frm->imdct[i]);
+    for (i = 0; i < FF_ARRAY_ELEMS(frm->tx); i++)
+        av_tx_uninit(&frm->tx[i]);
 
     ff_celt_pvq_uninit(&frm->pvq);
 
@@ -551,9 +553,12 @@ int ff_celt_init(AVCodecContext *avctx, CeltFrame **f, int output_channels,
     frm->output_channels = output_channels;
     frm->apply_phase_inv = apply_phase_inv;
 
-    for (i = 0; i < FF_ARRAY_ELEMS(frm->imdct); i++)
-        if ((ret = ff_mdct15_init(&frm->imdct[i], 1, i + 3, -1.0f/32768)) < 0)
+    for (i = 0; i < FF_ARRAY_ELEMS(frm->tx); i++) {
+        const float scale = -1.0/32768;
+        if ((ret = av_tx_init(&frm->tx[i], &frm->txfn[i], AV_TX_FLOAT_MDCT,
+                              1, 120 << i, &scale, 0)))
             goto fail;
+    }
 
     if ((ret = ff_celt_pvq_init(&frm->pvq, 0)) < 0)
         goto fail;
diff --git a/libavcodec/opus_celt.h b/libavcodec/opus_celt.h
index 7c1c5316b9..fe2414cb64 100644
--- a/libavcodec/opus_celt.h
+++ b/libavcodec/opus_celt.h
@@ -30,9 +30,9 @@
 #include "opus_pvq.h"
 #include "opusdsp.h"
 
-#include "mdct15.h"
 #include "libavutil/float_dsp.h"
 #include "libavutil/libm.h"
+#include "libavutil/tx.h"
 
 #define CELT_VECTORS                 11
 #define CELT_ALLOC_STEPS             6
@@ -92,7 +92,8 @@ typedef struct CeltBlock {
 struct CeltFrame {
     // constant values that do not change during context lifetime
     AVCodecContext      *avctx;
-    MDCT15Context       *imdct[4];
+    AVTXContext         *tx[4];
+    av_tx_fn             txfn[4];
     AVFloatDSPContext   *dsp;
     CeltBlock           block[2];
     CeltPVQ             *pvq;
diff --git a/libavcodec/opusenc.c b/libavcodec/opusenc.c
index 3c08ebcf69..4ca8cb336b 100644
--- a/libavcodec/opusenc.c
+++ b/libavcodec/opusenc.c
@@ -37,7 +37,8 @@ typedef struct OpusEncContext {
     AVCodecContext *avctx;
     AudioFrameQueue afq;
     AVFloatDSPContext *dsp;
-    MDCT15Context *mdct[CELT_BLOCK_NB];
+    AVTXContext *tx[CELT_BLOCK_NB];
+    av_tx_fn   txfn[CELT_BLOCK_NB];
     CeltPVQ *pvq;
     struct FFBufQueue bufqueue;
 
@@ -201,7 +202,7 @@ static void celt_frame_mdct(OpusEncContext *s, CeltFrame *f)
                 s->dsp->vector_fmul_reverse(&win[CELT_OVERLAP], src2,
                                             ff_celt_window - 8, 128);
                 src1 = src2;
-                s->mdct[0]->mdct(s->mdct[0], b->coeffs + t, win, f->blocks);
+                s->txfn[0](s->tx[0], b->coeffs + t, win, f->blocks * sizeof(float));
             }
         }
     } else {
@@ -223,7 +224,7 @@ static void celt_frame_mdct(OpusEncContext *s, CeltFrame *f)
                                         ff_celt_window - 8, 128);
             memcpy(win + lap_dst + blk_len, temp, CELT_OVERLAP*sizeof(float));
 
-            s->mdct[f->size]->mdct(s->mdct[f->size], b->coeffs, win, 1);
+            s->txfn[f->size](s->tx[f->size], b->coeffs, win, sizeof(float));
         }
     }
 
@@ -604,7 +605,7 @@ static av_cold int opus_encode_end(AVCodecContext *avctx)
     OpusEncContext *s = avctx->priv_data;
 
     for (int i = 0; i < CELT_BLOCK_NB; i++)
-        ff_mdct15_uninit(&s->mdct[i]);
+        av_tx_uninit(&s->tx[i]);
 
     ff_celt_pvq_uninit(&s->pvq);
     av_freep(&s->dsp);
@@ -660,10 +661,12 @@ static av_cold int opus_encode_init(AVCodecContext *avctx)
     if (!(s->dsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT)))
         return AVERROR(ENOMEM);
 
-    /* I have no idea why a base scaling factor of 68 works, could be the twiddles */
-    for (int i = 0; i < CELT_BLOCK_NB; i++)
-        if ((ret = ff_mdct15_init(&s->mdct[i], 0, i + 3, 68 << (CELT_BLOCK_NB - 1 - i))))
-            return AVERROR(ENOMEM);
+    for (int i = 0; i < CELT_BLOCK_NB; i++) {
+        const float scale = 68 << (CELT_BLOCK_NB - 1 - i);
+        if ((ret = av_tx_init(&s->tx[i], &s->txfn[i], AV_TX_FLOAT_MDCT,
+                              0, 120 << i, &scale, 0)))
+            return ret;
+    }
 
     /* Zero out previous energy (matters for inter first frame) */
     for (int ch = 0; ch < s->channels; ch++)
diff --git a/libavcodec/opusenc_psy.h b/libavcodec/opusenc_psy.h
index b91e4f1b8b..e79d3a108e 100644
--- a/libavcodec/opusenc_psy.h
+++ b/libavcodec/opusenc_psy.h
@@ -22,6 +22,7 @@
 #ifndef AVCODEC_OPUSENC_PSY_H
 #define AVCODEC_OPUSENC_PSY_H
 
+#include "mdct15.h"
 #include "opusenc.h"
 #include "opusenc_utils.h"
 #include "libavfilter/window_func.h"
diff --git a/libavcodec/vorbisdec.c b/libavcodec/vorbisdec.c
index 00e9cd8a13..0dfc51a1a3 100644
--- a/libavcodec/vorbisdec.c
+++ b/libavcodec/vorbisdec.c
@@ -29,6 +29,7 @@
 #include <inttypes.h>
 #include <math.h>
 
+#include "libavutil/tx.h"
 #include "libavutil/avassert.h"
 #include "libavutil/float_dsp.h"
 
@@ -128,7 +129,8 @@ typedef struct vorbis_context_s {
     VorbisDSPContext dsp;
     AVFloatDSPContext *fdsp;
 
-    FFTContext mdct[2];
+    AVTXContext *mdct[2];
+    av_tx_fn mdct_fn[2];
     uint8_t       first_frame;
     uint32_t      version;
     uint8_t       audio_channels;
@@ -199,8 +201,8 @@ static void vorbis_free(vorbis_context *vc)
     av_freep(&vc->residues);
     av_freep(&vc->modes);
 
-    ff_mdct_end(&vc->mdct[0]);
-    ff_mdct_end(&vc->mdct[1]);
+    av_tx_uninit(&vc->mdct[0]);
+    av_tx_uninit(&vc->mdct[1]);
 
     if (vc->codebooks)
         for (i = 0; i < vc->codebook_count; ++i) {
@@ -959,6 +961,7 @@ static int vorbis_parse_setup_hdr(vorbis_context *vc)
 
 static int vorbis_parse_id_hdr(vorbis_context *vc)
 {
+    const float mdct_scale = -1.0;
     GetBitContext *gb = &vc->gb;
     unsigned bl0, bl1;
 
@@ -1006,8 +1009,8 @@ static int vorbis_parse_id_hdr(vorbis_context *vc)
 
     vc->previous_window  = -1;
 
-    ff_mdct_init(&vc->mdct[0], bl0, 1, -1.0);
-    ff_mdct_init(&vc->mdct[1], bl1, 1, -1.0);
+    av_tx_init(&vc->mdct[0], &vc->mdct_fn[0], AV_TX_FLOAT_MDCT, 1, 1 << (bl0 - 1), &mdct_scale, 0);
+    av_tx_init(&vc->mdct[1], &vc->mdct_fn[1], AV_TX_FLOAT_MDCT, 1, 1 << (bl1 - 1), &mdct_scale, 0);
     vc->fdsp = avpriv_float_dsp_alloc(vc->avctx->flags & AV_CODEC_FLAG_BITEXACT);
     if (!vc->fdsp)
         return AVERROR(ENOMEM);
@@ -1575,7 +1578,8 @@ void ff_vorbis_inverse_coupling(float *mag, float *ang, intptr_t blocksize)
 static int vorbis_parse_audio_packet(vorbis_context *vc, float **floor_ptr)
 {
     GetBitContext *gb = &vc->gb;
-    FFTContext *mdct;
+    AVTXContext *mdct;
+    av_tx_fn mdct_fn;
     int previous_window = vc->previous_window;
     unsigned mode_number, blockflag, blocksize;
     int i, j;
@@ -1697,12 +1701,13 @@ static int vorbis_parse_audio_packet(vorbis_context *vc, float **floor_ptr)
 
 // Dotproduct, MDCT
 
-    mdct = &vc->mdct[blockflag];
+    mdct = vc->mdct[blockflag];
+    mdct_fn = vc->mdct_fn[blockflag];
 
     for (j = vc->audio_channels-1;j >= 0; j--) {
         ch_res_ptr   = vc->channel_residues + res_chan[j] * blocksize / 2;
         vc->fdsp->vector_fmul(floor_ptr[j], floor_ptr[j], ch_res_ptr, blocksize / 2);
-        mdct->imdct_half(mdct, ch_res_ptr, floor_ptr[j]);
+        mdct_fn(mdct, ch_res_ptr, floor_ptr[j], sizeof(float));
     }
 
 // Overlap/add, save data for next overlapping
diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c
index 18a679f2dc..f1c665713f 100644
--- a/libavcodec/vorbisenc.c
+++ b/libavcodec/vorbisenc.c
@@ -25,6 +25,7 @@
  */
 
 #include <float.h>
+#include "libavutil/tx.h"
 #include "libavutil/float_dsp.h"
 
 #include "avcodec.h"
@@ -105,7 +106,8 @@ typedef struct vorbis_enc_context {
     int channels;
     int sample_rate;
     int log2_blocksize[2];
-    FFTContext mdct[2];
+    AVTXContext *mdct[2];
+    av_tx_fn mdct_fn[2];
     const float *win[2];
     int have_saved;
     float *saved;
@@ -249,6 +251,7 @@ static int ready_residue(vorbis_enc_residue *rc, vorbis_enc_context *venc)
 static av_cold int dsp_init(AVCodecContext *avctx, vorbis_enc_context *venc)
 {
     int ret = 0;
+    const float scale = 1.0;
 
     venc->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
     if (!venc->fdsp)
@@ -258,9 +261,11 @@ static av_cold int dsp_init(AVCodecContext *avctx, vorbis_enc_context *venc)
     venc->win[0] = ff_vorbis_vwin[venc->log2_blocksize[0] - 6];
     venc->win[1] = ff_vorbis_vwin[venc->log2_blocksize[1] - 6];
 
-    if ((ret = ff_mdct_init(&venc->mdct[0], venc->log2_blocksize[0], 0, 1.0)) < 0)
+    if ((ret = av_tx_init(&venc->mdct[0], &venc->mdct_fn[0], AV_TX_FLOAT_MDCT, 0,
+                          1 << (venc->log2_blocksize[0] - 1), &scale, 0)) < 0)
         return ret;
-    if ((ret = ff_mdct_init(&venc->mdct[1], venc->log2_blocksize[1], 0, 1.0)) < 0)
+    if ((ret = av_tx_init(&venc->mdct[1], &venc->mdct_fn[1], AV_TX_FLOAT_MDCT, 0,
+                          1 << (venc->log2_blocksize[1] - 1), &scale, 0)) < 0)
         return ret;
 
     return 0;
@@ -1016,8 +1021,8 @@ static int apply_window_and_mdct(vorbis_enc_context *venc)
         fdsp->vector_fmul_reverse(offset, offset, win, window_len);
         fdsp->vector_fmul_scalar(offset, offset, 1/n, window_len);
 
-        venc->mdct[1].mdct_calc(&venc->mdct[1], venc->coeffs + channel * window_len,
-                     venc->samples + channel * window_len * 2);
+        venc->mdct_fn[1](venc->mdct[1], venc->coeffs + channel * window_len,
+                         venc->samples + channel * window_len * 2, sizeof(float));
     }
     return 1;
 }
@@ -1254,8 +1259,8 @@ static av_cold int vorbis_encode_close(AVCodecContext *avctx)
     av_freep(&venc->scratch);
     av_freep(&venc->fdsp);
 
-    ff_mdct_end(&venc->mdct[0]);
-    ff_mdct_end(&venc->mdct[1]);
+    av_tx_uninit(&venc->mdct[0]);
+    av_tx_uninit(&venc->mdct[1]);
     ff_af_queue_close(&venc->afq);
     ff_bufqueue_discard_all(&venc->bufqueue);