Message ID | 20170712035127.6917-1-atomnuker@gmail.com |
---|---|
State | New |
Headers | show |
On 12 July 2017 at 04:51, Rostislav Pehlivanov <atomnuker@gmail.com> wrote: > vector_fmul_reverse requires padding the window at the front > > Signed-off-by: Rostislav Pehlivanov <atomnuker@gmail.com> > --- > libavcodec/opus_celt.h | 4 ++-- > libavcodec/opusenc.c | 8 +++----- > libavcodec/opustab.c | 8 ++++++-- > libavcodec/opustab.h | 3 +-- > 4 files changed, 12 insertions(+), 11 deletions(-) > > diff --git a/libavcodec/opus_celt.h b/libavcodec/opus_celt.h > index b80ade84f2..62aee359f6 100644 > --- a/libavcodec/opus_celt.h > +++ b/libavcodec/opus_celt.h > @@ -75,8 +75,8 @@ typedef struct CeltBlock { > DECLARE_ALIGNED(32, float, coeffs)[CELT_MAX_FRAME_SIZE]; > > /* Used by the encoder */ > - DECLARE_ALIGNED(32, float, overlap)[120]; > - DECLARE_ALIGNED(32, float, samples)[CELT_MAX_FRAME_SIZE]; > + DECLARE_ALIGNED(32, float, overlap)[CELT_OVERLAP]; > + DECLARE_ALIGNED(32, float, samples)[FFALIGN(CELT_MAX_FRAME_SIZE, 8)]; > > /* postfilter parameters */ > int pf_period_new; > diff --git a/libavcodec/opusenc.c b/libavcodec/opusenc.c > index 8aba291e7e..6cefd33884 100644 > --- a/libavcodec/opusenc.c > +++ b/libavcodec/opusenc.c > @@ -210,17 +210,15 @@ static void celt_frame_mdct(OpusEncContext *s, > CeltFrame *f) > int i, t, ch; > float *win = s->scratch; > > - /* I think I can use s->dsp->vector_fmul_window for transients at > least */ > if (f->transient) { > for (ch = 0; ch < f->channels; ch++) { > CeltBlock *b = &f->block[ch]; > float *src1 = b->overlap; > for (t = 0; t < f->blocks; t++) { > float *src2 = &b->samples[CELT_OVERLAP*t]; > - for (i = 0; i < CELT_OVERLAP; i++) { > - win[ i] = src1[i]*ff_celt_window[i]; > - win[CELT_OVERLAP + i] = src2[i]*ff_celt_window[CELT_OVERLAP > - i - 1]; > - } > + s->dsp->vector_fmul(win, src1, ff_celt_window, > CELT_OVERLAP); > + s->dsp->vector_fmul_reverse(&win[CELT_OVERLAP], src2, > + ff_celt_window - 8, > CELT_OVERLAP + 8); > src1 = src2; > s->mdct[0]->mdct(s->mdct[0], b->coeffs + t, win, > f->blocks); > } > diff --git a/libavcodec/opustab.c b/libavcodec/opustab.c > index 635cc363e2..1f3049676e 100644 > --- a/libavcodec/opustab.c > +++ b/libavcodec/opustab.c > @@ -1096,7 +1096,9 @@ const float ff_celt_postfilter_taps[3][3] = { > { 0.7998046875f, 0.1000976562f, 0.0 } > }; > > -DECLARE_ALIGNED(32, const float, ff_celt_window)[120] = { > +DECLARE_ALIGNED(32, static const float, ff_celt_window_padded)[128] = { > + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, > + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, > 6.7286966e-05f, 0.00060551348f, 0.0016815970f, 0.0032947962f, > 0.0054439943f, > 0.0081276923f, 0.011344001f, 0.015090633f, 0.019364886f, 0.024163635f, > 0.029483315f, 0.035319905f, 0.041668911f, 0.048525347f, 0.055883718f, > @@ -1120,9 +1122,11 @@ DECLARE_ALIGNED(32, const float, > ff_celt_window)[120] = { > 0.99499004f, 0.99592297f, 0.99672162f, 0.99739874f, 0.99796667f, > 0.99843728f, 0.99882195f, 0.99913147f, 0.99937606f, 0.99956527f, > 0.99970802f, 0.99981248f, 0.99988613f, 0.99993565f, 0.99996697f, > - 0.99998518f, 0.99999457f, 0.99999859f, 0.99999982f, 1.0000000f, > + 0.99998518f, 0.99999457f, 0.99999859f, 0.99999982f, 1.00000000f, > }; > > +const float *ff_celt_window = &ff_celt_window_padded[8]; > + > /* square of the window, used for the postfilter */ > const float ff_celt_window2[120] = { > 4.5275357e-09f, 3.66647e-07f, 2.82777e-06f, 1.08557e-05f, > 2.96371e-05f, 6.60594e-05f, > diff --git a/libavcodec/opustab.h b/libavcodec/opustab.h > index b4589869ef..bce5a42830 100644 > --- a/libavcodec/opustab.h > +++ b/libavcodec/opustab.h > @@ -154,8 +154,7 @@ extern const uint32_t ff_celt_pvq_u[1272]; > extern const float ff_celt_postfilter_taps[3][3]; > > extern const float ff_celt_window2[120]; > - > -DECLARE_ALIGNED(32, extern const float, ff_celt_window)[120]; > +extern const float *ff_celt_window; > > extern const uint32_t * const ff_celt_pvq_u_row[15]; > > -- > 2.13.2 > > Pushed
diff --git a/libavcodec/opus_celt.h b/libavcodec/opus_celt.h index b80ade84f2..62aee359f6 100644 --- a/libavcodec/opus_celt.h +++ b/libavcodec/opus_celt.h @@ -75,8 +75,8 @@ typedef struct CeltBlock { DECLARE_ALIGNED(32, float, coeffs)[CELT_MAX_FRAME_SIZE]; /* Used by the encoder */ - DECLARE_ALIGNED(32, float, overlap)[120]; - DECLARE_ALIGNED(32, float, samples)[CELT_MAX_FRAME_SIZE]; + DECLARE_ALIGNED(32, float, overlap)[CELT_OVERLAP]; + DECLARE_ALIGNED(32, float, samples)[FFALIGN(CELT_MAX_FRAME_SIZE, 8)]; /* postfilter parameters */ int pf_period_new; diff --git a/libavcodec/opusenc.c b/libavcodec/opusenc.c index 8aba291e7e..6cefd33884 100644 --- a/libavcodec/opusenc.c +++ b/libavcodec/opusenc.c @@ -210,17 +210,15 @@ static void celt_frame_mdct(OpusEncContext *s, CeltFrame *f) int i, t, ch; float *win = s->scratch; - /* I think I can use s->dsp->vector_fmul_window for transients at least */ if (f->transient) { for (ch = 0; ch < f->channels; ch++) { CeltBlock *b = &f->block[ch]; float *src1 = b->overlap; for (t = 0; t < f->blocks; t++) { float *src2 = &b->samples[CELT_OVERLAP*t]; - for (i = 0; i < CELT_OVERLAP; i++) { - win[ i] = src1[i]*ff_celt_window[i]; - win[CELT_OVERLAP + i] = src2[i]*ff_celt_window[CELT_OVERLAP - i - 1]; - } + s->dsp->vector_fmul(win, src1, ff_celt_window, CELT_OVERLAP); + s->dsp->vector_fmul_reverse(&win[CELT_OVERLAP], src2, + ff_celt_window - 8, CELT_OVERLAP + 8); src1 = src2; s->mdct[0]->mdct(s->mdct[0], b->coeffs + t, win, f->blocks); } diff --git a/libavcodec/opustab.c b/libavcodec/opustab.c index 635cc363e2..1f3049676e 100644 --- a/libavcodec/opustab.c +++ b/libavcodec/opustab.c @@ -1096,7 +1096,9 @@ const float ff_celt_postfilter_taps[3][3] = { { 0.7998046875f, 0.1000976562f, 0.0 } }; -DECLARE_ALIGNED(32, const float, ff_celt_window)[120] = { +DECLARE_ALIGNED(32, static const float, ff_celt_window_padded)[128] = { + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 6.7286966e-05f, 0.00060551348f, 0.0016815970f, 0.0032947962f, 0.0054439943f, 0.0081276923f, 0.011344001f, 0.015090633f, 0.019364886f, 0.024163635f, 0.029483315f, 0.035319905f, 0.041668911f, 0.048525347f, 0.055883718f, @@ -1120,9 +1122,11 @@ DECLARE_ALIGNED(32, const float, ff_celt_window)[120] = { 0.99499004f, 0.99592297f, 0.99672162f, 0.99739874f, 0.99796667f, 0.99843728f, 0.99882195f, 0.99913147f, 0.99937606f, 0.99956527f, 0.99970802f, 0.99981248f, 0.99988613f, 0.99993565f, 0.99996697f, - 0.99998518f, 0.99999457f, 0.99999859f, 0.99999982f, 1.0000000f, + 0.99998518f, 0.99999457f, 0.99999859f, 0.99999982f, 1.00000000f, }; +const float *ff_celt_window = &ff_celt_window_padded[8]; + /* square of the window, used for the postfilter */ const float ff_celt_window2[120] = { 4.5275357e-09f, 3.66647e-07f, 2.82777e-06f, 1.08557e-05f, 2.96371e-05f, 6.60594e-05f, diff --git a/libavcodec/opustab.h b/libavcodec/opustab.h index b4589869ef..bce5a42830 100644 --- a/libavcodec/opustab.h +++ b/libavcodec/opustab.h @@ -154,8 +154,7 @@ extern const uint32_t ff_celt_pvq_u[1272]; extern const float ff_celt_postfilter_taps[3][3]; extern const float ff_celt_window2[120]; - -DECLARE_ALIGNED(32, extern const float, ff_celt_window)[120]; +extern const float *ff_celt_window; extern const uint32_t * const ff_celt_pvq_u_row[15];
vector_fmul_reverse requires padding the window at the front Signed-off-by: Rostislav Pehlivanov <atomnuker@gmail.com> --- libavcodec/opus_celt.h | 4 ++-- libavcodec/opusenc.c | 8 +++----- libavcodec/opustab.c | 8 ++++++-- libavcodec/opustab.h | 3 +-- 4 files changed, 12 insertions(+), 11 deletions(-)