diff mbox

[FFmpeg-devel] opusenc: use float_dsp for transient mdcts

Message ID 20170712035127.6917-1-atomnuker@gmail.com
State New
Headers show

Commit Message

Rostislav Pehlivanov July 12, 2017, 3:51 a.m. UTC
vector_fmul_reverse requires padding the window at the front

Signed-off-by: Rostislav Pehlivanov <atomnuker@gmail.com>
---
 libavcodec/opus_celt.h | 4 ++--
 libavcodec/opusenc.c   | 8 +++-----
 libavcodec/opustab.c   | 8 ++++++--
 libavcodec/opustab.h   | 3 +--
 4 files changed, 12 insertions(+), 11 deletions(-)

Comments

Rostislav Pehlivanov July 13, 2017, 6:54 p.m. UTC | #1
On 12 July 2017 at 04:51, Rostislav Pehlivanov <atomnuker@gmail.com> wrote:

> vector_fmul_reverse requires padding the window at the front
>
> Signed-off-by: Rostislav Pehlivanov <atomnuker@gmail.com>
> ---
>  libavcodec/opus_celt.h | 4 ++--
>  libavcodec/opusenc.c   | 8 +++-----
>  libavcodec/opustab.c   | 8 ++++++--
>  libavcodec/opustab.h   | 3 +--
>  4 files changed, 12 insertions(+), 11 deletions(-)
>
> diff --git a/libavcodec/opus_celt.h b/libavcodec/opus_celt.h
> index b80ade84f2..62aee359f6 100644
> --- a/libavcodec/opus_celt.h
> +++ b/libavcodec/opus_celt.h
> @@ -75,8 +75,8 @@ typedef struct CeltBlock {
>      DECLARE_ALIGNED(32, float, coeffs)[CELT_MAX_FRAME_SIZE];
>
>      /* Used by the encoder */
> -    DECLARE_ALIGNED(32, float, overlap)[120];
> -    DECLARE_ALIGNED(32, float, samples)[CELT_MAX_FRAME_SIZE];
> +    DECLARE_ALIGNED(32, float, overlap)[CELT_OVERLAP];
> +    DECLARE_ALIGNED(32, float, samples)[FFALIGN(CELT_MAX_FRAME_SIZE, 8)];
>
>      /* postfilter parameters */
>      int   pf_period_new;
> diff --git a/libavcodec/opusenc.c b/libavcodec/opusenc.c
> index 8aba291e7e..6cefd33884 100644
> --- a/libavcodec/opusenc.c
> +++ b/libavcodec/opusenc.c
> @@ -210,17 +210,15 @@ static void celt_frame_mdct(OpusEncContext *s,
> CeltFrame *f)
>      int i, t, ch;
>      float *win = s->scratch;
>
> -    /* I think I can use s->dsp->vector_fmul_window for transients at
> least */
>      if (f->transient) {
>          for (ch = 0; ch < f->channels; ch++) {
>              CeltBlock *b = &f->block[ch];
>              float *src1 = b->overlap;
>              for (t = 0; t < f->blocks; t++) {
>                  float *src2 = &b->samples[CELT_OVERLAP*t];
> -                for (i = 0; i < CELT_OVERLAP; i++) {
> -                    win[               i] = src1[i]*ff_celt_window[i];
> -                    win[CELT_OVERLAP + i] = src2[i]*ff_celt_window[CELT_OVERLAP
> - i - 1];
> -                }
> +                s->dsp->vector_fmul(win, src1, ff_celt_window,
> CELT_OVERLAP);
> +                s->dsp->vector_fmul_reverse(&win[CELT_OVERLAP], src2,
> +                                            ff_celt_window - 8,
> CELT_OVERLAP + 8);
>                  src1 = src2;
>                  s->mdct[0]->mdct(s->mdct[0], b->coeffs + t, win,
> f->blocks);
>              }
> diff --git a/libavcodec/opustab.c b/libavcodec/opustab.c
> index 635cc363e2..1f3049676e 100644
> --- a/libavcodec/opustab.c
> +++ b/libavcodec/opustab.c
> @@ -1096,7 +1096,9 @@ const float ff_celt_postfilter_taps[3][3] = {
>      { 0.7998046875f, 0.1000976562f, 0.0           }
>  };
>
> -DECLARE_ALIGNED(32, const float, ff_celt_window)[120] = {
> +DECLARE_ALIGNED(32, static const float, ff_celt_window_padded)[128] = {
> +    0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f,
> +    0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f,
>      6.7286966e-05f, 0.00060551348f, 0.0016815970f, 0.0032947962f,
> 0.0054439943f,
>      0.0081276923f, 0.011344001f, 0.015090633f, 0.019364886f, 0.024163635f,
>      0.029483315f, 0.035319905f, 0.041668911f, 0.048525347f, 0.055883718f,
> @@ -1120,9 +1122,11 @@ DECLARE_ALIGNED(32, const float,
> ff_celt_window)[120] = {
>      0.99499004f, 0.99592297f, 0.99672162f, 0.99739874f, 0.99796667f,
>      0.99843728f, 0.99882195f, 0.99913147f, 0.99937606f, 0.99956527f,
>      0.99970802f, 0.99981248f, 0.99988613f, 0.99993565f, 0.99996697f,
> -    0.99998518f, 0.99999457f, 0.99999859f, 0.99999982f, 1.0000000f,
> +    0.99998518f, 0.99999457f, 0.99999859f, 0.99999982f, 1.00000000f,
>  };
>
> +const float *ff_celt_window = &ff_celt_window_padded[8];
> +
>  /* square of the window, used for the postfilter */
>  const float ff_celt_window2[120] = {
>      4.5275357e-09f, 3.66647e-07f, 2.82777e-06f, 1.08557e-05f,
> 2.96371e-05f, 6.60594e-05f,
> diff --git a/libavcodec/opustab.h b/libavcodec/opustab.h
> index b4589869ef..bce5a42830 100644
> --- a/libavcodec/opustab.h
> +++ b/libavcodec/opustab.h
> @@ -154,8 +154,7 @@ extern const uint32_t ff_celt_pvq_u[1272];
>  extern const float    ff_celt_postfilter_taps[3][3];
>
>  extern const float    ff_celt_window2[120];
> -
> -DECLARE_ALIGNED(32, extern const float, ff_celt_window)[120];
> +extern const float   *ff_celt_window;
>
>  extern const uint32_t * const ff_celt_pvq_u_row[15];
>
> --
> 2.13.2
>
>
Pushed
diff mbox

Patch

diff --git a/libavcodec/opus_celt.h b/libavcodec/opus_celt.h
index b80ade84f2..62aee359f6 100644
--- a/libavcodec/opus_celt.h
+++ b/libavcodec/opus_celt.h
@@ -75,8 +75,8 @@  typedef struct CeltBlock {
     DECLARE_ALIGNED(32, float, coeffs)[CELT_MAX_FRAME_SIZE];
 
     /* Used by the encoder */
-    DECLARE_ALIGNED(32, float, overlap)[120];
-    DECLARE_ALIGNED(32, float, samples)[CELT_MAX_FRAME_SIZE];
+    DECLARE_ALIGNED(32, float, overlap)[CELT_OVERLAP];
+    DECLARE_ALIGNED(32, float, samples)[FFALIGN(CELT_MAX_FRAME_SIZE, 8)];
 
     /* postfilter parameters */
     int   pf_period_new;
diff --git a/libavcodec/opusenc.c b/libavcodec/opusenc.c
index 8aba291e7e..6cefd33884 100644
--- a/libavcodec/opusenc.c
+++ b/libavcodec/opusenc.c
@@ -210,17 +210,15 @@  static void celt_frame_mdct(OpusEncContext *s, CeltFrame *f)
     int i, t, ch;
     float *win = s->scratch;
 
-    /* I think I can use s->dsp->vector_fmul_window for transients at least */
     if (f->transient) {
         for (ch = 0; ch < f->channels; ch++) {
             CeltBlock *b = &f->block[ch];
             float *src1 = b->overlap;
             for (t = 0; t < f->blocks; t++) {
                 float *src2 = &b->samples[CELT_OVERLAP*t];
-                for (i = 0; i < CELT_OVERLAP; i++) {
-                    win[               i] = src1[i]*ff_celt_window[i];
-                    win[CELT_OVERLAP + i] = src2[i]*ff_celt_window[CELT_OVERLAP - i - 1];
-                }
+                s->dsp->vector_fmul(win, src1, ff_celt_window, CELT_OVERLAP);
+                s->dsp->vector_fmul_reverse(&win[CELT_OVERLAP], src2,
+                                            ff_celt_window - 8, CELT_OVERLAP + 8);
                 src1 = src2;
                 s->mdct[0]->mdct(s->mdct[0], b->coeffs + t, win, f->blocks);
             }
diff --git a/libavcodec/opustab.c b/libavcodec/opustab.c
index 635cc363e2..1f3049676e 100644
--- a/libavcodec/opustab.c
+++ b/libavcodec/opustab.c
@@ -1096,7 +1096,9 @@  const float ff_celt_postfilter_taps[3][3] = {
     { 0.7998046875f, 0.1000976562f, 0.0           }
 };
 
-DECLARE_ALIGNED(32, const float, ff_celt_window)[120] = {
+DECLARE_ALIGNED(32, static const float, ff_celt_window_padded)[128] = {
+    0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f,
+    0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f,
     6.7286966e-05f, 0.00060551348f, 0.0016815970f, 0.0032947962f, 0.0054439943f,
     0.0081276923f, 0.011344001f, 0.015090633f, 0.019364886f, 0.024163635f,
     0.029483315f, 0.035319905f, 0.041668911f, 0.048525347f, 0.055883718f,
@@ -1120,9 +1122,11 @@  DECLARE_ALIGNED(32, const float, ff_celt_window)[120] = {
     0.99499004f, 0.99592297f, 0.99672162f, 0.99739874f, 0.99796667f,
     0.99843728f, 0.99882195f, 0.99913147f, 0.99937606f, 0.99956527f,
     0.99970802f, 0.99981248f, 0.99988613f, 0.99993565f, 0.99996697f,
-    0.99998518f, 0.99999457f, 0.99999859f, 0.99999982f, 1.0000000f,
+    0.99998518f, 0.99999457f, 0.99999859f, 0.99999982f, 1.00000000f,
 };
 
+const float *ff_celt_window = &ff_celt_window_padded[8];
+
 /* square of the window, used for the postfilter */
 const float ff_celt_window2[120] = {
     4.5275357e-09f, 3.66647e-07f, 2.82777e-06f, 1.08557e-05f, 2.96371e-05f, 6.60594e-05f,
diff --git a/libavcodec/opustab.h b/libavcodec/opustab.h
index b4589869ef..bce5a42830 100644
--- a/libavcodec/opustab.h
+++ b/libavcodec/opustab.h
@@ -154,8 +154,7 @@  extern const uint32_t ff_celt_pvq_u[1272];
 extern const float    ff_celt_postfilter_taps[3][3];
 
 extern const float    ff_celt_window2[120];
-
-DECLARE_ALIGNED(32, extern const float, ff_celt_window)[120];
+extern const float   *ff_celt_window;
 
 extern const uint32_t * const ff_celt_pvq_u_row[15];