diff mbox

[FFmpeg-devel,v2,2/5] imdct15: rename to mdct15 and add a forward transform

Message ID CAE9qxYBQNBzh6VNcPcB8vkRy5scPsuKKN9_hc-CqOkDg5-E2TQ@mail.gmail.com
State Accepted
Headers show

Commit Message

Rostislav Pehlivanov Feb. 11, 2017, 1:30 a.m. UTC
On 11 February 2017 at 01:26, Rostislav Pehlivanov <atomnuker@gmail.com>
wrote:

>
>
> On 11 February 2017 at 01:22, James Almer <jamrial@gmail.com> wrote:
>
>> On 2/10/2017 9:25 PM, Rostislav Pehlivanov wrote:
>> > Handles strides (needed for Opus transients), does pre-reindexing and
>> folding
>> > without needing a copy.
>> >
>> > Signed-off-by: Rostislav Pehlivanov <atomnuker@gmail.com>
>> > ---
>> >  configure                    |   6 +-
>> >  libavcodec/Makefile          |   2 +-
>> >  libavcodec/aac.h             |   4 +-
>> >  libavcodec/aacdec.c          |   2 +-
>> >  libavcodec/aacdec_template.c |   4 +-
>> >  libavcodec/mdct15.c          | 335 ++++++++++++++++++++++++++++++
>> +++++++++++++
>> >  libavcodec/mdct15.h          |  70 +++++++++
>> >  libavcodec/opus_celt.c       |  10 +-
>> >  8 files changed, 419 insertions(+), 14 deletions(-)
>> >  create mode 100644 libavcodec/mdct15.c
>> >  create mode 100644 libavcodec/mdct15.h
>>
>> Forgot to do "git rm libavcodec/imdct15.*"?
>>
>> > diff --git a/libavcodec/mdct15.h b/libavcodec/mdct15.h
>> > new file mode 100644
>> > index 0000000000..2a503e5996
>> > --- /dev/null
>> > +++ b/libavcodec/mdct15.h
>> > @@ -0,0 +1,70 @@
>> > +/*
>> > + * Copyright (c) 2017 Rostislav Pehlivanov <atomnuker@gmail.com>
>> > + *
>> > + * This file is part of FFmpeg.
>> > + *
>> > + * FFmpeg is free software; you can redistribute it and/or
>> > + * modify it under the terms of the GNU Lesser General Public
>> > + * License as published by the Free Software Foundation; either
>> > + * version 2.1 of the License, or (at your option) any later version.
>> > + *
>> > + * FFmpeg is distributed in the hope that it will be useful,
>> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> > + * Lesser General Public License for more details.
>> > + *
>> > + * You should have received a copy of the GNU Lesser General Public
>> > + * License along with FFmpeg; if not, write to the Free Software
>> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
>> 02110-1301 USA
>> > + */
>> > +
>> > +#ifndef AVCODEC_MDCT15_H
>> > +#define AVCODEC_MDCT15_H
>> > +
>> > +#include <stddef.h>
>> > +
>> > +#include "fft.h"
>> > +
>> > +typedef struct MDCT15Context {
>> > +    int fft_n;
>> > +    int len2;
>> > +    int len4;
>> > +    int inverse;
>> > +    int *pfa_prereindex;
>> > +    int *pfa_postreindex;
>> > +
>> > +    FFTContext ptwo_fft;
>> > +
>> > +    FFTComplex *tmp;
>> > +
>> > +    FFTComplex *twiddle_exptab;
>> > +
>> > +    /* 0 - 18: fft15 twiddles, 19 - 20: fft5 twiddles */
>> > +    FFTComplex exptab[21];
>> > +
>> > +    /**
>> > +     * Calculate a full 2N -> N MDCT
>> > +     */
>> > +    void (*mdct)(struct MDCT15Context *s, float *dst, const float
>> *src, ptrdiff_t stride);
>> > +
>> > +    /**
>> > +     * Calculate the middle half of the iMDCT
>> > +     */
>> > +    void (*imdct_half)(struct MDCT15Context *s, float *dst, const
>> float *src,
>> > +                       ptrdiff_t src_stride, float scale);
>> > +} MDCT15Context;
>> > +
>> > +/**
>> > + * Init an (i)MDCT of the length 2 * 15 * (2^N)
>> > + */
>> > +int ff_mdct15_init(MDCT15Context **ps, int inverse, int N, double
>> scale);
>> > +
>> > +/**
>> > + * Frees a context
>> > + */
>> > +void ff_mdct15_uninit(MDCT15Context **ps);
>> > +
>> > +
>> > +void ff_mdct15_init_aarch64(MDCT15Context *s);
>>
>> This seems like a remnant of old code.
>>
>>
> Yep, it is, changed locally to delete ff_mdct15_init_aarch64().
>

Nevermind, I saw what you meant.
Added that change and removed the imdct15.c/.h

I've attached the new patch.
diff mbox

Patch

From 42fcb6c47ca2eaf50405a865ee2639fa5ebdc4c9 Mon Sep 17 00:00:00 2001
From: Rostislav Pehlivanov <atomnuker@gmail.com>
Date: Wed, 1 Feb 2017 03:13:06 +0000
Subject: [PATCH] imdct15: rename to mdct15 and add a forward transform

Handles strides (needed for Opus transients), does pre-reindexing and folding
without needing a copy.

Signed-off-by: Rostislav Pehlivanov <atomnuker@gmail.com>
---
 configure                          |  6 +--
 libavcodec/Makefile                |  2 +-
 libavcodec/aac.h                   |  4 +-
 libavcodec/aacdec.c                |  2 +-
 libavcodec/aacdec_template.c       |  4 +-
 libavcodec/{imdct15.c => mdct15.c} | 90 +++++++++++++++++++++++++++++++-------
 libavcodec/{imdct15.h => mdct15.h} | 28 +++++++-----
 libavcodec/opus_celt.c             | 10 ++---
 8 files changed, 105 insertions(+), 41 deletions(-)
 rename libavcodec/{imdct15.c => mdct15.c} (73%)
 rename libavcodec/{imdct15.h => mdct15.h} (66%)

diff --git a/configure b/configure
index 72b86bc407..a7cd3a2244 100755
--- a/configure
+++ b/configure
@@ -2107,7 +2107,7 @@  CONFIG_EXTRA="
     huffyuvencdsp
     idctdsp
     iirfilter
-    imdct15
+    mdct15
     intrax8
     iso_media
     ividsp
@@ -2349,7 +2349,7 @@  vc1dsp_select="h264chroma qpeldsp startcode"
 rdft_select="fft"
 
 # decoders / encoders
-aac_decoder_select="imdct15 mdct sinewin"
+aac_decoder_select="mdct15 mdct sinewin"
 aac_fixed_decoder_select="mdct sinewin"
 aac_encoder_select="audio_frame_queue iirfilter lpc mdct sinewin"
 aac_latm_decoder_select="aac_decoder aac_latm_parser"
@@ -2491,7 +2491,7 @@  nellymoser_encoder_select="audio_frame_queue mdct sinewin"
 nuv_decoder_select="idctdsp lzo"
 on2avc_decoder_select="mdct"
 opus_decoder_deps="swresample"
-opus_decoder_select="imdct15"
+opus_decoder_select="mdct15"
 png_decoder_select="zlib"
 png_encoder_select="llvidencdsp zlib"
 prores_decoder_select="blockdsp idctdsp"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index c4e108665d..89a27a000e 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -84,7 +84,7 @@  OBJS-$(CONFIG_HUFFYUVDSP)              += huffyuvdsp.o
 OBJS-$(CONFIG_HUFFYUVENCDSP)           += huffyuvencdsp.o
 OBJS-$(CONFIG_IDCTDSP)                 += idctdsp.o simple_idct.o jrevdct.o
 OBJS-$(CONFIG_IIRFILTER)               += iirfilter.o
-OBJS-$(CONFIG_IMDCT15)                 += imdct15.o
+OBJS-$(CONFIG_MDCT15)                  += mdct15.o
 OBJS-$(CONFIG_INTRAX8)                 += intrax8.o intrax8dsp.o
 OBJS-$(CONFIG_IVIDSP)                  += ivi_dsp.o
 OBJS-$(CONFIG_JNI)                     += ffjni.o jni.o
diff --git a/libavcodec/aac.h b/libavcodec/aac.h
index b1f4aa74f0..97a2df6b86 100644
--- a/libavcodec/aac.h
+++ b/libavcodec/aac.h
@@ -36,7 +36,7 @@ 
 #include "libavutil/fixed_dsp.h"
 #include "avcodec.h"
 #if !USE_FIXED
-#include "imdct15.h"
+#include "mdct15.h"
 #endif
 #include "fft.h"
 #include "mpeg4audio.h"
@@ -327,7 +327,7 @@  struct AACContext {
 #if USE_FIXED
     AVFixedDSPContext *fdsp;
 #else
-    IMDCT15Context *mdct480;
+    MDCT15Context *mdct480;
     AVFloatDSPContext *fdsp;
 #endif /* USE_FIXED */
     int random_state;
diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c
index ee9b4eb45f..1a10c121b9 100644
--- a/libavcodec/aacdec.c
+++ b/libavcodec/aacdec.c
@@ -42,7 +42,7 @@ 
 #include "internal.h"
 #include "get_bits.h"
 #include "fft.h"
-#include "imdct15.h"
+#include "mdct15.h"
 #include "lpc.h"
 #include "kbdwin.h"
 #include "sinewin.h"
diff --git a/libavcodec/aacdec_template.c b/libavcodec/aacdec_template.c
index 83e9fb55ba..0bfd633336 100644
--- a/libavcodec/aacdec_template.c
+++ b/libavcodec/aacdec_template.c
@@ -1185,7 +1185,7 @@  static av_cold int aac_decode_init(AVCodecContext *avctx)
     AAC_RENAME_32(ff_mdct_init)(&ac->mdct_small,  8, 1, 1.0 / RANGE15(128.0));
     AAC_RENAME_32(ff_mdct_init)(&ac->mdct_ltp,   11, 0, RANGE15(-2.0));
 #if !USE_FIXED
-    ret = ff_imdct15_init(&ac->mdct480, 5);
+    ret = ff_mdct15_init(&ac->mdct480, 1, 5, -1.0f);
     if (ret < 0)
         return ret;
 #endif
@@ -3192,7 +3192,7 @@  static av_cold int aac_decode_close(AVCodecContext *avctx)
     ff_mdct_end(&ac->mdct_ld);
     ff_mdct_end(&ac->mdct_ltp);
 #if !USE_FIXED
-    ff_imdct15_uninit(&ac->mdct480);
+    ff_mdct15_uninit(&ac->mdct480);
 #endif
     av_freep(&ac->fdsp);
     return 0;
diff --git a/libavcodec/imdct15.c b/libavcodec/mdct15.c
similarity index 73%
rename from libavcodec/imdct15.c
rename to libavcodec/mdct15.c
index a6d4249636..a6bea2d469 100644
--- a/libavcodec/imdct15.c
+++ b/libavcodec/mdct15.c
@@ -33,7 +33,8 @@ 
 #include "libavutil/attributes.h"
 #include "libavutil/common.h"
 
-#include "imdct15.h"
+#include "avfft.h"
+#include "mdct15.h"
 
 // complex c = a * b
 #define CMUL3(cre, cim, are, aim, bre, bim)          \
@@ -44,9 +45,9 @@  do {                                                 \
 
 #define CMUL(c, a, b) CMUL3((c).re, (c).im, (a).re, (a).im, (b).re, (b).im)
 
-av_cold void ff_imdct15_uninit(IMDCT15Context **ps)
+av_cold void ff_mdct15_uninit(MDCT15Context **ps)
 {
-    IMDCT15Context *s = *ps;
+    MDCT15Context *s = *ps;
 
     if (!s)
         return;
@@ -61,10 +62,12 @@  av_cold void ff_imdct15_uninit(IMDCT15Context **ps)
     av_freep(ps);
 }
 
-static void imdct15_half(IMDCT15Context *s, float *dst, const float *src,
+static void mdct15(MDCT15Context *s, float *dst, const float *src, ptrdiff_t stride);
+
+static void imdct15_half(MDCT15Context *s, float *dst, const float *src,
                          ptrdiff_t stride, float scale);
 
-static inline int init_pfa_reindex_tabs(IMDCT15Context *s)
+static inline int init_pfa_reindex_tabs(MDCT15Context *s)
 {
     int i, j;
     const int b_ptwo = s->ptwo_fft.nbits; /* Bits for the power of two FFTs */
@@ -85,7 +88,7 @@  static inline int init_pfa_reindex_tabs(IMDCT15Context *s)
         for (j = 0; j < 15; j++) {
             const int q_pre = ((l_ptwo * j)/15 + i) >> b_ptwo;
             const int q_post = (((j*inv_1)/15) + (i*inv_2)) >> b_ptwo;
-            const int k_pre = 15*i + (j - q_pre*15)*l_ptwo;
+            const int k_pre = 15*i + ((j - q_pre*15) << b_ptwo);
             const int k_post = i*inv_2*15 + j*inv_1 - 15*q_post*l_ptwo;
             s->pfa_prereindex[i*15 + j] = k_pre;
             s->pfa_postreindex[k_post] = l_ptwo*j + i;
@@ -95,9 +98,10 @@  static inline int init_pfa_reindex_tabs(IMDCT15Context *s)
     return 0;
 }
 
-av_cold int ff_imdct15_init(IMDCT15Context **ps, int N)
+av_cold int ff_mdct15_init(MDCT15Context **ps, int inverse, int N, double scale)
 {
-    IMDCT15Context *s;
+    MDCT15Context *s;
+    double alpha, theta;
     int len2 = 15 * (1 << N);
     int len  = 2 * len2;
     int i;
@@ -113,9 +117,11 @@  av_cold int ff_imdct15_init(IMDCT15Context **ps, int N)
     s->fft_n = N - 1;
     s->len4 = len2 / 2;
     s->len2 = len2;
+    s->inverse = inverse;
+    s->mdct = mdct15;
     s->imdct_half = imdct15_half;
 
-    if (ff_fft_init(&s->ptwo_fft, N - 1, 1) < 0)
+    if (ff_fft_init(&s->ptwo_fft, N - 1, s->inverse) < 0)
         goto fail;
 
     if (init_pfa_reindex_tabs(s))
@@ -129,15 +135,20 @@  av_cold int ff_imdct15_init(IMDCT15Context **ps, int N)
     if (!s->twiddle_exptab)
         goto fail;
 
+    theta = 0.125f + (scale < 0 ? s->len4 : 0);
+    scale = sqrt(fabs(scale));
     for (i = 0; i < s->len4; i++) {
-        s->twiddle_exptab[i].re = cos(2 * M_PI * (i + 0.125f + s->len4) / len);
-        s->twiddle_exptab[i].im = sin(2 * M_PI * (i + 0.125f + s->len4) / len);
+        alpha = 2 * M_PI * (i + theta) / len;
+        s->twiddle_exptab[i].re = cos(alpha) * scale;
+        s->twiddle_exptab[i].im = sin(alpha) * scale;
     }
 
     /* 15-point FFT exptab */
     for (i = 0; i < 19; i++) {
         if (i < 15) {
             double theta = (2.0f * M_PI * i) / 15.0f;
+            if (!s->inverse)
+                theta *= -1;
             s->exptab[i].re = cos(theta);
             s->exptab[i].im = sin(theta);
         } else { /* Wrap around to simplify fft15 */
@@ -152,15 +163,17 @@  av_cold int ff_imdct15_init(IMDCT15Context **ps, int N)
     s->exptab[20].im = sin(1.0f * M_PI / 5.0f);
 
     /* Invert the phase for an inverse transform, do nothing for a forward transform */
-    s->exptab[19].im *= -1;
-    s->exptab[20].im *= -1;
+    if (s->inverse) {
+        s->exptab[19].im *= -1;
+        s->exptab[20].im *= -1;
+    }
 
     *ps = s;
 
     return 0;
 
 fail:
-    ff_imdct15_uninit(&s);
+    ff_mdct15_uninit(&s);
     return AVERROR(ENOMEM);
 }
 
@@ -211,8 +224,7 @@  static inline void fft5(const FFTComplex exptab[2], FFTComplex *out,
     out[4].im = in[0].im + z0[3].im;
 }
 
-static inline void fft15(const FFTComplex exptab[22], FFTComplex *out,
-                         const FFTComplex *in, size_t stride)
+static void fft15(const FFTComplex exptab[22], FFTComplex *out, const FFTComplex *in, size_t stride)
 {
     int k;
     FFTComplex tmp1[5], tmp2[5], tmp3[5];
@@ -241,7 +253,51 @@  static inline void fft15(const FFTComplex exptab[22], FFTComplex *out,
     }
 }
 
-static void imdct15_half(IMDCT15Context *s, float *dst, const float *src,
+static void mdct15(MDCT15Context *s, float *dst, const float *src, ptrdiff_t stride)
+{
+    int i, j;
+    const int len4 = s->len4, len3 = len4 * 3, len8 = len4 >> 1;
+    const int l_ptwo = 1 << s->ptwo_fft.nbits;
+    FFTComplex fft15in[15];
+
+    /* Folding and pre-reindexing */
+    for (i = 0; i < l_ptwo; i++) {
+        for (j = 0; j < 15; j++) {
+            float re, im;
+            const int k = s->pfa_prereindex[i*15 + j];
+            if (k < len8) {
+                re = -src[2*k+len3] - src[len3-1-2*k];
+                im = -src[len4+2*k] + src[len4-1-2*k];
+            } else {
+                re =  src[2*k-len4] - src[1*len3-1-2*k];
+                im = -src[2*k+len4] - src[5*len4-1-2*k];
+            }
+            CMUL3(fft15in[j].re, fft15in[j].im, re, im, s->twiddle_exptab[k].re, -s->twiddle_exptab[k].im);
+        }
+        fft15(s->exptab, s->tmp + s->ptwo_fft.revtab[i], fft15in, l_ptwo);
+    }
+
+    /* Then a 15xN FFT (where N is a power of two) */
+    for (i = 0; i < 15; i++)
+        s->ptwo_fft.fft_calc(&s->ptwo_fft, s->tmp + l_ptwo*i);
+
+    /* Reindex again, apply twiddles and output */
+    for (i = 0; i < len8; i++) {
+        float re0, im0, re1, im1;
+        const int i0 = len8 + i, i1 = len8 - i - 1;
+        const int s0 = s->pfa_postreindex[i0], s1 = s->pfa_postreindex[i1];
+
+        CMUL3(im1, re0, s->tmp[s1].re, s->tmp[s1].im, s->twiddle_exptab[i1].im, s->twiddle_exptab[i1].re);
+        CMUL3(im0, re1, s->tmp[s0].re, s->tmp[s0].im, s->twiddle_exptab[i0].im, s->twiddle_exptab[i0].re);
+
+        dst[2*i1*stride         ] = re0;
+        dst[2*i1*stride + stride] = im0;
+        dst[2*i0*stride         ] = re1;
+        dst[2*i0*stride + stride] = im1;
+    }
+}
+
+static void imdct15_half(MDCT15Context *s, float *dst, const float *src,
                          ptrdiff_t stride, float scale)
 {
     FFTComplex fft15in[15];
diff --git a/libavcodec/imdct15.h b/libavcodec/mdct15.h
similarity index 66%
rename from libavcodec/imdct15.h
rename to libavcodec/mdct15.h
index a31f11e359..ef94edff6c 100644
--- a/libavcodec/imdct15.h
+++ b/libavcodec/mdct15.h
@@ -1,4 +1,6 @@ 
 /*
+ * Copyright (c) 2017 Rostislav Pehlivanov <atomnuker@gmail.com>
+ *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -16,17 +18,18 @@ 
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AVCODEC_IMDCT15_H
-#define AVCODEC_IMDCT15_H
+#ifndef AVCODEC_MDCT15_H
+#define AVCODEC_MDCT15_H
 
 #include <stddef.h>
 
 #include "fft.h"
 
-typedef struct IMDCT15Context {
+typedef struct MDCT15Context {
     int fft_n;
     int len2;
     int len4;
+    int inverse;
     int *pfa_prereindex;
     int *pfa_postreindex;
 
@@ -40,20 +43,25 @@  typedef struct IMDCT15Context {
     FFTComplex exptab[21];
 
     /**
+     * Calculate a full 2N -> N MDCT
+     */
+    void (*mdct)(struct MDCT15Context *s, float *dst, const float *src, ptrdiff_t stride);
+
+    /**
      * Calculate the middle half of the iMDCT
      */
-    void (*imdct_half)(struct IMDCT15Context *s, float *dst, const float *src,
+    void (*imdct_half)(struct MDCT15Context *s, float *dst, const float *src,
                        ptrdiff_t src_stride, float scale);
-} IMDCT15Context;
+} MDCT15Context;
 
 /**
- * Init an iMDCT of the length 2 * 15 * (2^N)
+ * Init an (i)MDCT of the length 2 * 15 * (2^N)
  */
-int ff_imdct15_init(IMDCT15Context **s, int N);
+int ff_mdct15_init(MDCT15Context **ps, int inverse, int N, double scale);
 
 /**
- * Free an iMDCT.
+ * Frees a context
  */
-void ff_imdct15_uninit(IMDCT15Context **s);
+void ff_mdct15_uninit(MDCT15Context **ps);
 
-#endif /* AVCODEC_IMDCT15_H */
+#endif /* AVCODEC_MDCT15_H */
diff --git a/libavcodec/opus_celt.c b/libavcodec/opus_celt.c
index 96fedb7a49..a0f018e664 100644
--- a/libavcodec/opus_celt.c
+++ b/libavcodec/opus_celt.c
@@ -29,7 +29,7 @@ 
 #include "libavutil/float_dsp.h"
 #include "libavutil/libm.h"
 
-#include "imdct15.h"
+#include "mdct15.h"
 #include "opus.h"
 #include "opustab.h"
 
@@ -63,7 +63,7 @@  typedef struct CeltFrame {
 struct CeltContext {
     // constant values that do not change during context lifetime
     AVCodecContext    *avctx;
-    IMDCT15Context    *imdct[4];
+    MDCT15Context     *imdct[4];
     AVFloatDSPContext  *dsp;
     int output_channels;
 
@@ -1596,7 +1596,7 @@  int ff_celt_decode_frame(CeltContext *s, OpusRangeCoder *rc,
     int silence = 0;
     int transient = 0;
     int anticollapse = 0;
-    IMDCT15Context *imdct;
+    MDCT15Context *imdct;
     float imdct_scale = 1.0;
 
     if (coded_channels != 1 && coded_channels != 2) {
@@ -1792,7 +1792,7 @@  void ff_celt_free(CeltContext **ps)
         return;
 
     for (i = 0; i < FF_ARRAY_ELEMS(s->imdct); i++)
-        ff_imdct15_uninit(&s->imdct[i]);
+        ff_mdct15_uninit(&s->imdct[i]);
 
     av_freep(&s->dsp);
     av_freep(ps);
@@ -1817,7 +1817,7 @@  int ff_celt_init(AVCodecContext *avctx, CeltContext **ps, int output_channels)
     s->output_channels = output_channels;
 
     for (i = 0; i < FF_ARRAY_ELEMS(s->imdct); i++) {
-        ret = ff_imdct15_init(&s->imdct[i], i + 3);
+        ret = ff_mdct15_init(&s->imdct[i], 1, i + 3, -1.0f);
         if (ret < 0)
             goto fail;
     }
-- 
2.11.0.483.g087da7b7c