diff mbox series

[FFmpeg-devel,3/7] avcodec/aarch64/mpegvideoencdsp: add dotprod implementation for pix_norm1

Message ID 20240818201326.100492-3-ramiro.polla@gmail.com
State New
Headers show
Series [FFmpeg-devel,1/7] checkasm/mpegvideoencdsp: add pix_sum and pix_norm1 | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Ramiro Polla Aug. 18, 2024, 8:13 p.m. UTC
A76
pix_norm1_c:        231.5
pix_norm1_neon:      44.2 ( 5.24x)
pix_norm1_dotprod:   20.7 (11.18x)
---
 libavcodec/aarch64/mpegvideoencdsp_init.c | 10 ++++++++
 libavcodec/aarch64/mpegvideoencdsp_neon.S | 28 +++++++++++++++++++++++
 2 files changed, 38 insertions(+)

Comments

Martin Storsjö Aug. 18, 2024, 8:44 p.m. UTC | #1
On Sun, 18 Aug 2024, Ramiro Polla wrote:

>                      A76
> pix_norm1_c:        231.5
> pix_norm1_neon:      44.2 ( 5.24x)
> pix_norm1_dotprod:   20.7 (11.18x)
> ---
> libavcodec/aarch64/mpegvideoencdsp_init.c | 10 ++++++++
> libavcodec/aarch64/mpegvideoencdsp_neon.S | 28 +++++++++++++++++++++++
> 2 files changed, 38 insertions(+)
>
> diff --git a/libavcodec/aarch64/mpegvideoencdsp_init.c b/libavcodec/aarch64/mpegvideoencdsp_init.c
> index 7eb632ed1b..d0ce07e178 100644
> --- a/libavcodec/aarch64/mpegvideoencdsp_init.c
> +++ b/libavcodec/aarch64/mpegvideoencdsp_init.c
> @@ -27,6 +27,10 @@
> int ff_pix_sum16_neon(const uint8_t *pix, int line_size);
> int ff_pix_norm1_neon(const uint8_t *pix, int line_size);
>
> +#if HAVE_DOTPROD
> +int ff_pix_norm1_neon_dotprod(const uint8_t *pix, int line_size);
> +#endif
> +
> av_cold void ff_mpegvideoencdsp_init_aarch64(MpegvideoEncDSPContext *c,
>                                              AVCodecContext *avctx)
> {
> @@ -36,4 +40,10 @@ av_cold void ff_mpegvideoencdsp_init_aarch64(MpegvideoEncDSPContext *c,
>         c->pix_sum   = ff_pix_sum16_neon;
>         c->pix_norm1 = ff_pix_norm1_neon;
>     }
> +
> +#if HAVE_DOTPROD
> +    if (have_dotprod(cpu_flags)) {
> +        c->pix_norm1 = ff_pix_norm1_neon_dotprod;
> +    }
> +#endif
> }
> diff --git a/libavcodec/aarch64/mpegvideoencdsp_neon.S b/libavcodec/aarch64/mpegvideoencdsp_neon.S
> index 89e50e29b3..eccbdd850f 100644
> --- a/libavcodec/aarch64/mpegvideoencdsp_neon.S
> +++ b/libavcodec/aarch64/mpegvideoencdsp_neon.S
> @@ -65,3 +65,31 @@ function ff_pix_norm1_neon, export=1
>
>         ret
> endfunc
> +
> +#if HAVE_DOTPROD
> +ENABLE_DOTPROD
> +
> +function ff_pix_norm1_neon_dotprod, export=1
> +// x0  const uint8_t *pix
> +// x1  int line_size
> +
> +        sxtw            x1, w1
> +        movi            v0.16b, #0
> +        mov             w2, #16
> +
> +1:
> +        ld1             { v1.16b }, [x0], x1
> +        ld1             { v2.16b }, [x0], x1

Nit, spaces inside of {}

> +        udot            v0.4s, v1.16b, v1.16b
> +        subs            w2, w2, #2
> +        udot            v0.4s, v2.16b, v2.16b
> +        b.ne            1b
> +
> +        uaddlv          d0, v0.4s
> +        fmov            w0, s0
> +
> +        ret
> +endfunc

This implementation LGTM otherwise

// Martin
diff mbox series

Patch

diff --git a/libavcodec/aarch64/mpegvideoencdsp_init.c b/libavcodec/aarch64/mpegvideoencdsp_init.c
index 7eb632ed1b..d0ce07e178 100644
--- a/libavcodec/aarch64/mpegvideoencdsp_init.c
+++ b/libavcodec/aarch64/mpegvideoencdsp_init.c
@@ -27,6 +27,10 @@ 
 int ff_pix_sum16_neon(const uint8_t *pix, int line_size);
 int ff_pix_norm1_neon(const uint8_t *pix, int line_size);
 
+#if HAVE_DOTPROD
+int ff_pix_norm1_neon_dotprod(const uint8_t *pix, int line_size);
+#endif
+
 av_cold void ff_mpegvideoencdsp_init_aarch64(MpegvideoEncDSPContext *c,
                                              AVCodecContext *avctx)
 {
@@ -36,4 +40,10 @@  av_cold void ff_mpegvideoencdsp_init_aarch64(MpegvideoEncDSPContext *c,
         c->pix_sum   = ff_pix_sum16_neon;
         c->pix_norm1 = ff_pix_norm1_neon;
     }
+
+#if HAVE_DOTPROD
+    if (have_dotprod(cpu_flags)) {
+        c->pix_norm1 = ff_pix_norm1_neon_dotprod;
+    }
+#endif
 }
diff --git a/libavcodec/aarch64/mpegvideoencdsp_neon.S b/libavcodec/aarch64/mpegvideoencdsp_neon.S
index 89e50e29b3..eccbdd850f 100644
--- a/libavcodec/aarch64/mpegvideoencdsp_neon.S
+++ b/libavcodec/aarch64/mpegvideoencdsp_neon.S
@@ -65,3 +65,31 @@  function ff_pix_norm1_neon, export=1
 
         ret
 endfunc
+
+#if HAVE_DOTPROD
+ENABLE_DOTPROD
+
+function ff_pix_norm1_neon_dotprod, export=1
+// x0  const uint8_t *pix
+// x1  int line_size
+
+        sxtw            x1, w1
+        movi            v0.16b, #0
+        mov             w2, #16
+
+1:
+        ld1             { v1.16b }, [x0], x1
+        ld1             { v2.16b }, [x0], x1
+        udot            v0.4s, v1.16b, v1.16b
+        subs            w2, w2, #2
+        udot            v0.4s, v2.16b, v2.16b
+        b.ne            1b
+
+        uaddlv          d0, v0.4s
+        fmov            w0, s0
+
+        ret
+endfunc
+
+DISABLE_DOTPROD
+#endif