Message ID | 20240818201326.100492-3-ramiro.polla@gmail.com |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel,1/7] checkasm/mpegvideoencdsp: add pix_sum and pix_norm1 | expand |
Context | Check | Description |
---|---|---|
yinshiyou/make_loongarch64 | success | Make finished |
yinshiyou/make_fate_loongarch64 | success | Make fate finished |
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | success | Make fate finished |
On Sun, 18 Aug 2024, Ramiro Polla wrote: > A76 > pix_norm1_c: 231.5 > pix_norm1_neon: 44.2 ( 5.24x) > pix_norm1_dotprod: 20.7 (11.18x) > --- > libavcodec/aarch64/mpegvideoencdsp_init.c | 10 ++++++++ > libavcodec/aarch64/mpegvideoencdsp_neon.S | 28 +++++++++++++++++++++++ > 2 files changed, 38 insertions(+) > > diff --git a/libavcodec/aarch64/mpegvideoencdsp_init.c b/libavcodec/aarch64/mpegvideoencdsp_init.c > index 7eb632ed1b..d0ce07e178 100644 > --- a/libavcodec/aarch64/mpegvideoencdsp_init.c > +++ b/libavcodec/aarch64/mpegvideoencdsp_init.c > @@ -27,6 +27,10 @@ > int ff_pix_sum16_neon(const uint8_t *pix, int line_size); > int ff_pix_norm1_neon(const uint8_t *pix, int line_size); > > +#if HAVE_DOTPROD > +int ff_pix_norm1_neon_dotprod(const uint8_t *pix, int line_size); > +#endif > + > av_cold void ff_mpegvideoencdsp_init_aarch64(MpegvideoEncDSPContext *c, > AVCodecContext *avctx) > { > @@ -36,4 +40,10 @@ av_cold void ff_mpegvideoencdsp_init_aarch64(MpegvideoEncDSPContext *c, > c->pix_sum = ff_pix_sum16_neon; > c->pix_norm1 = ff_pix_norm1_neon; > } > + > +#if HAVE_DOTPROD > + if (have_dotprod(cpu_flags)) { > + c->pix_norm1 = ff_pix_norm1_neon_dotprod; > + } > +#endif > } > diff --git a/libavcodec/aarch64/mpegvideoencdsp_neon.S b/libavcodec/aarch64/mpegvideoencdsp_neon.S > index 89e50e29b3..eccbdd850f 100644 > --- a/libavcodec/aarch64/mpegvideoencdsp_neon.S > +++ b/libavcodec/aarch64/mpegvideoencdsp_neon.S > @@ -65,3 +65,31 @@ function ff_pix_norm1_neon, export=1 > > ret > endfunc > + > +#if HAVE_DOTPROD > +ENABLE_DOTPROD > + > +function ff_pix_norm1_neon_dotprod, export=1 > +// x0 const uint8_t *pix > +// x1 int line_size > + > + sxtw x1, w1 > + movi v0.16b, #0 > + mov w2, #16 > + > +1: > + ld1 { v1.16b }, [x0], x1 > + ld1 { v2.16b }, [x0], x1 Nit, spaces inside of {} > + udot v0.4s, v1.16b, v1.16b > + subs w2, w2, #2 > + udot v0.4s, v2.16b, v2.16b > + b.ne 1b > + > + uaddlv d0, v0.4s > + fmov w0, s0 > + > + ret > +endfunc This implementation LGTM otherwise // Martin
diff --git a/libavcodec/aarch64/mpegvideoencdsp_init.c b/libavcodec/aarch64/mpegvideoencdsp_init.c index 7eb632ed1b..d0ce07e178 100644 --- a/libavcodec/aarch64/mpegvideoencdsp_init.c +++ b/libavcodec/aarch64/mpegvideoencdsp_init.c @@ -27,6 +27,10 @@ int ff_pix_sum16_neon(const uint8_t *pix, int line_size); int ff_pix_norm1_neon(const uint8_t *pix, int line_size); +#if HAVE_DOTPROD +int ff_pix_norm1_neon_dotprod(const uint8_t *pix, int line_size); +#endif + av_cold void ff_mpegvideoencdsp_init_aarch64(MpegvideoEncDSPContext *c, AVCodecContext *avctx) { @@ -36,4 +40,10 @@ av_cold void ff_mpegvideoencdsp_init_aarch64(MpegvideoEncDSPContext *c, c->pix_sum = ff_pix_sum16_neon; c->pix_norm1 = ff_pix_norm1_neon; } + +#if HAVE_DOTPROD + if (have_dotprod(cpu_flags)) { + c->pix_norm1 = ff_pix_norm1_neon_dotprod; + } +#endif } diff --git a/libavcodec/aarch64/mpegvideoencdsp_neon.S b/libavcodec/aarch64/mpegvideoencdsp_neon.S index 89e50e29b3..eccbdd850f 100644 --- a/libavcodec/aarch64/mpegvideoencdsp_neon.S +++ b/libavcodec/aarch64/mpegvideoencdsp_neon.S @@ -65,3 +65,31 @@ function ff_pix_norm1_neon, export=1 ret endfunc + +#if HAVE_DOTPROD +ENABLE_DOTPROD + +function ff_pix_norm1_neon_dotprod, export=1 +// x0 const uint8_t *pix +// x1 int line_size + + sxtw x1, w1 + movi v0.16b, #0 + mov w2, #16 + +1: + ld1 { v1.16b }, [x0], x1 + ld1 { v2.16b }, [x0], x1 + udot v0.4s, v1.16b, v1.16b + subs w2, w2, #2 + udot v0.4s, v2.16b, v2.16b + b.ne 1b + + uaddlv d0, v0.4s + fmov w0, s0 + + ret +endfunc + +DISABLE_DOTPROD +#endif