Message ID | 20171126225111.5108-6-james.darnley@gmail.com |
---|---|
State | New |
Headers | show |
On 26 November 2017 at 22:51, James Darnley <james.darnley@gmail.com> wrote: > --- > libavcodec/x86/flac_dsp_gpl.asm | 40 ++++++++++++++++++++---------- > ---------- > 1 file changed, 20 insertions(+), 20 deletions(-) > > diff --git a/libavcodec/x86/flac_dsp_gpl.asm > b/libavcodec/x86/flac_dsp_gpl.asm > index 4d212ed212..952fc8b86b 100644 > --- a/libavcodec/x86/flac_dsp_gpl.asm > +++ b/libavcodec/x86/flac_dsp_gpl.asm > @@ -75,42 +75,42 @@ neg orderq > %if cpuflag(avx) > vbroadcastss m2, [coefsq+posj*4] > %else > - movd m2, [coefsq+posj*4] ; c = coefs[j] > - SPLATD m2 > + movd m2, [coefsq+posj*4] ; c = coefs[j] > + SPLATD m2 > %endif > %if cpuflag(avx) > - vpmulld m1, m2, [smpq+negj*4-4] > - vpmulld m5, m2, [smpq+negj*4-4+mmsize] > - vpmulld m7, m2, [smpq+negj*4-4+mmsize*2] > - vpaddd m0, m1 > - vpaddd m4, m5 > - vpaddd m6, m7 > + vpmulld m1, m2, [smpq+negj*4-4] > + vpmulld m5, m2, [smpq+negj*4-4+mmsize] > + vpmulld m7, m2, [smpq+negj*4-4+mmsize*2] > + vpaddd m0, m1 > + vpaddd m4, m5 > + vpaddd m6, m7 > %else > - movu m1, [smpq+negj*4-4] ; s = smp[i-j-1] > - movu m5, [smpq+negj*4-4+mmsize] > - movu m7, [smpq+negj*4-4+mmsize*2] > - pmulld m1, m2 > - pmulld m5, m2 > - pmulld m7, m2 > - paddd m0, m1 ; p += c * s > - paddd m4, m5 > - paddd m6, m7 > + movu m1, [smpq+negj*4-4] ; s = smp[i-j-1] > + movu m5, [smpq+negj*4-4+mmsize] > + movu m7, [smpq+negj*4-4+mmsize*2] > + pmulld m1, m2 > + pmulld m5, m2 > + pmulld m7, m2 > + paddd m0, m1 ; p += c * s > + paddd m4, m5 > + paddd m6, m7 > %endif > > dec negj > inc posj > jnz .looporder > > - psrad m0, xm3 ; p >>= shift > + psrad m0, xm3 ; p >>= shift > psrad m4, xm3 > psrad m6, xm3 > movu m1, [smpq] > movu m5, [smpq+mmsize] > movu m7, [smpq+mmsize*2] > - psubd m1, m0 ; smp[i] - p > + psubd m1, m0 ; smp[i] - p > psubd m5, m4 > psubd m7, m6 > - movu [resq], m1 ; res[i] = smp[i] - (p >> shift) > + movu [resq], m1 ; res[i] = smp[i] - (p >> shift) > movu [resq+mmsize], m5 > movu [resq+mmsize*2], m7 > > -- > 2.15.0 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel > lgtm, should have just pushed this
diff --git a/libavcodec/x86/flac_dsp_gpl.asm b/libavcodec/x86/flac_dsp_gpl.asm index 4d212ed212..952fc8b86b 100644 --- a/libavcodec/x86/flac_dsp_gpl.asm +++ b/libavcodec/x86/flac_dsp_gpl.asm @@ -75,42 +75,42 @@ neg orderq %if cpuflag(avx) vbroadcastss m2, [coefsq+posj*4] %else - movd m2, [coefsq+posj*4] ; c = coefs[j] - SPLATD m2 + movd m2, [coefsq+posj*4] ; c = coefs[j] + SPLATD m2 %endif %if cpuflag(avx) - vpmulld m1, m2, [smpq+negj*4-4] - vpmulld m5, m2, [smpq+negj*4-4+mmsize] - vpmulld m7, m2, [smpq+negj*4-4+mmsize*2] - vpaddd m0, m1 - vpaddd m4, m5 - vpaddd m6, m7 + vpmulld m1, m2, [smpq+negj*4-4] + vpmulld m5, m2, [smpq+negj*4-4+mmsize] + vpmulld m7, m2, [smpq+negj*4-4+mmsize*2] + vpaddd m0, m1 + vpaddd m4, m5 + vpaddd m6, m7 %else - movu m1, [smpq+negj*4-4] ; s = smp[i-j-1] - movu m5, [smpq+negj*4-4+mmsize] - movu m7, [smpq+negj*4-4+mmsize*2] - pmulld m1, m2 - pmulld m5, m2 - pmulld m7, m2 - paddd m0, m1 ; p += c * s - paddd m4, m5 - paddd m6, m7 + movu m1, [smpq+negj*4-4] ; s = smp[i-j-1] + movu m5, [smpq+negj*4-4+mmsize] + movu m7, [smpq+negj*4-4+mmsize*2] + pmulld m1, m2 + pmulld m5, m2 + pmulld m7, m2 + paddd m0, m1 ; p += c * s + paddd m4, m5 + paddd m6, m7 %endif dec negj inc posj jnz .looporder - psrad m0, xm3 ; p >>= shift + psrad m0, xm3 ; p >>= shift psrad m4, xm3 psrad m6, xm3 movu m1, [smpq] movu m5, [smpq+mmsize] movu m7, [smpq+mmsize*2] - psubd m1, m0 ; smp[i] - p + psubd m1, m0 ; smp[i] - p psubd m5, m4 psubd m7, m6 - movu [resq], m1 ; res[i] = smp[i] - (p >> shift) + movu [resq], m1 ; res[i] = smp[i] - (p >> shift) movu [resq+mmsize], m5 movu [resq+mmsize*2], m7