diff mbox series

[FFmpeg-devel,1/3] lavc/aarch64: add clip N macro

Message ID 20230322000710.47513-1-jdek@itanimul.li
State Accepted
Commit 37cde570bc2dcd64a15c5d9a37b9fa0d78d84f9f
Headers show
Series [FFmpeg-devel,1/3] lavc/aarch64: add clip N macro | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

J. Dekker March 22, 2023, 12:07 a.m. UTC
Signed-off-by: J. Dekker <jdek@itanimul.li>
---
 libavcodec/aarch64/hevcdsp_idct_neon.S | 19 +++++--------------
 libavcodec/aarch64/neon.S              | 11 +++++++++++
 2 files changed, 16 insertions(+), 14 deletions(-)

Comments

Martin Storsjö March 22, 2023, 8:38 a.m. UTC | #1
On Wed, 22 Mar 2023, J. Dekker wrote:

> Signed-off-by: J. Dekker <jdek@itanimul.li>
> ---
> libavcodec/aarch64/hevcdsp_idct_neon.S | 19 +++++--------------
> libavcodec/aarch64/neon.S              | 11 +++++++++++
> 2 files changed, 16 insertions(+), 14 deletions(-)
>
> diff --git a/libavcodec/aarch64/hevcdsp_idct_neon.S b/libavcodec/aarch64/hevcdsp_idct_neon.S
> index 467cb0f48a..3e59dd20bb 100644
> --- a/libavcodec/aarch64/hevcdsp_idct_neon.S
> +++ b/libavcodec/aarch64/hevcdsp_idct_neon.S
> @@ -5,7 +5,7 @@
>  *
>  * Ported from arm/hevcdsp_idct_neon.S by
>  * Copyright (c) 2020 Reimar Döffinger
> - * Copyright (c) 2020 J. Dekker
> + * Copyright (c) 2023 J. Dekker <jdek@itanimul.li>
>  *
>  * This file is part of FFmpeg.
>  *
> @@ -38,13 +38,6 @@ const trans, align=4
>         .short          31, 22, 13, 4
> endconst
> 
> -.macro clip2 in1, in2, min, max
> -        smax            \in1, \in1, \min
> -        smax            \in2, \in2, \min
> -        smin            \in1, \in1, \max
> -        smin            \in2, \in2, \max
> -.endm
> -
> function ff_hevc_add_residual_4x4_8_neon, export=1
>         ld1             {v0.8h-v1.8h}, [x1]
>         ld1             {v2.s}[0], [x0], x2
> @@ -182,7 +175,7 @@ function hevc_add_residual_4x4_16_neon, export=0
>         ld1             {v3.d}[1], [x12], x2
>         movi            v4.8h, #0
>         sqadd           v1.8h, v1.8h, v3.8h
> -        clip2           v0.8h, v1.8h, v4.8h, v21.8h
> +        clip            v4.8h, v21.8h, v0.8h, v1.8h
>         st1             {v0.d}[0], [x0],  x2
>         st1             {v0.d}[1], [x0],  x2
>         st1             {v1.d}[0], [x0],  x2
> @@ -201,7 +194,7 @@ function hevc_add_residual_8x8_16_neon, export=0
>         sqadd           v0.8h, v0.8h, v2.8h
>         ld1             {v3.8h}, [x12]
>         sqadd           v1.8h, v1.8h, v3.8h
> -        clip2           v0.8h, v1.8h, v4.8h, v21.8h
> +        clip            v4.8h, v21.8h, v0.8h, v1.8h
>         st1             {v0.8h}, [x0],  x2
>         st1             {v1.8h}, [x12], x2
>         bne             1b
> @@ -221,8 +214,7 @@ function hevc_add_residual_16x16_16_neon, export=0
>         sqadd           v1.8h, v1.8h, v17.8h
>         sqadd           v2.8h, v2.8h, v18.8h
>         sqadd           v3.8h, v3.8h, v19.8h
> -        clip2           v0.8h, v1.8h, v20.8h, v21.8h
> -        clip2           v2.8h, v3.8h, v20.8h, v21.8h
> +        clip            v20.8h, v21.8h, v0.8h, v1.8h, v2.8h, v3.8h
>         st1             {v0.8h-v1.8h}, [x0],  x2
>         st1             {v2.8h-v3.8h}, [x12], x2
>         bne             1b
> @@ -239,8 +231,7 @@ function hevc_add_residual_32x32_16_neon, export=0
>         sqadd           v1.8h, v1.8h, v17.8h
>         sqadd           v2.8h, v2.8h, v18.8h
>         sqadd           v3.8h, v3.8h, v19.8h
> -        clip2           v0.8h, v1.8h, v20.8h, v21.8h
> -        clip2           v2.8h, v3.8h, v20.8h, v21.8h
> +        clip            v20.8h, v21.8h, v0.8h, v1.8h, v2.8h, v3.8h
>         st1             {v0.8h-v3.8h}, [x0], x2
>         bne             1b
>         ret
> diff --git a/libavcodec/aarch64/neon.S b/libavcodec/aarch64/neon.S
> index 1ad32c359d..bc105e4861 100644
> --- a/libavcodec/aarch64/neon.S
> +++ b/libavcodec/aarch64/neon.S
> @@ -1,6 +1,8 @@
> /*
>  * This file is part of FFmpeg.
>  *
> + * Copyright (c) 2023 J. Dekker <jdek@itanimul.li>
> + *
>  * FFmpeg is free software; you can redistribute it and/or
>  * modify it under the terms of the GNU Lesser General Public
>  * License as published by the Free Software Foundation; either
> @@ -16,6 +18,15 @@
>  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>  */
> 
> +.macro clip min, max, regs:vararg
> +.irp x, \regs
> +        smax            \x, \x, \min
> +.endr
> +.irp x, \regs
> +        smin            \x, \x, \max
> +.endr
> +.endm
> +

LGTM, the vararg argument handling looks neat here.

// Martin
diff mbox series

Patch

diff --git a/libavcodec/aarch64/hevcdsp_idct_neon.S b/libavcodec/aarch64/hevcdsp_idct_neon.S
index 467cb0f48a..3e59dd20bb 100644
--- a/libavcodec/aarch64/hevcdsp_idct_neon.S
+++ b/libavcodec/aarch64/hevcdsp_idct_neon.S
@@ -5,7 +5,7 @@ 
  *
  * Ported from arm/hevcdsp_idct_neon.S by
  * Copyright (c) 2020 Reimar Döffinger
- * Copyright (c) 2020 J. Dekker
+ * Copyright (c) 2023 J. Dekker <jdek@itanimul.li>
  *
  * This file is part of FFmpeg.
  *
@@ -38,13 +38,6 @@  const trans, align=4
         .short          31, 22, 13, 4
 endconst
 
-.macro clip2 in1, in2, min, max
-        smax            \in1, \in1, \min
-        smax            \in2, \in2, \min
-        smin            \in1, \in1, \max
-        smin            \in2, \in2, \max
-.endm
-
 function ff_hevc_add_residual_4x4_8_neon, export=1
         ld1             {v0.8h-v1.8h}, [x1]
         ld1             {v2.s}[0], [x0], x2
@@ -182,7 +175,7 @@  function hevc_add_residual_4x4_16_neon, export=0
         ld1             {v3.d}[1], [x12], x2
         movi            v4.8h, #0
         sqadd           v1.8h, v1.8h, v3.8h
-        clip2           v0.8h, v1.8h, v4.8h, v21.8h
+        clip            v4.8h, v21.8h, v0.8h, v1.8h
         st1             {v0.d}[0], [x0],  x2
         st1             {v0.d}[1], [x0],  x2
         st1             {v1.d}[0], [x0],  x2
@@ -201,7 +194,7 @@  function hevc_add_residual_8x8_16_neon, export=0
         sqadd           v0.8h, v0.8h, v2.8h
         ld1             {v3.8h}, [x12]
         sqadd           v1.8h, v1.8h, v3.8h
-        clip2           v0.8h, v1.8h, v4.8h, v21.8h
+        clip            v4.8h, v21.8h, v0.8h, v1.8h
         st1             {v0.8h}, [x0],  x2
         st1             {v1.8h}, [x12], x2
         bne             1b
@@ -221,8 +214,7 @@  function hevc_add_residual_16x16_16_neon, export=0
         sqadd           v1.8h, v1.8h, v17.8h
         sqadd           v2.8h, v2.8h, v18.8h
         sqadd           v3.8h, v3.8h, v19.8h
-        clip2           v0.8h, v1.8h, v20.8h, v21.8h
-        clip2           v2.8h, v3.8h, v20.8h, v21.8h
+        clip            v20.8h, v21.8h, v0.8h, v1.8h, v2.8h, v3.8h
         st1             {v0.8h-v1.8h}, [x0],  x2
         st1             {v2.8h-v3.8h}, [x12], x2
         bne             1b
@@ -239,8 +231,7 @@  function hevc_add_residual_32x32_16_neon, export=0
         sqadd           v1.8h, v1.8h, v17.8h
         sqadd           v2.8h, v2.8h, v18.8h
         sqadd           v3.8h, v3.8h, v19.8h
-        clip2           v0.8h, v1.8h, v20.8h, v21.8h
-        clip2           v2.8h, v3.8h, v20.8h, v21.8h
+        clip            v20.8h, v21.8h, v0.8h, v1.8h, v2.8h, v3.8h
         st1             {v0.8h-v3.8h}, [x0], x2
         bne             1b
         ret
diff --git a/libavcodec/aarch64/neon.S b/libavcodec/aarch64/neon.S
index 1ad32c359d..bc105e4861 100644
--- a/libavcodec/aarch64/neon.S
+++ b/libavcodec/aarch64/neon.S
@@ -1,6 +1,8 @@ 
 /*
  * This file is part of FFmpeg.
  *
+ * Copyright (c) 2023 J. Dekker <jdek@itanimul.li>
+ *
  * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
@@ -16,6 +18,15 @@ 
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+.macro clip min, max, regs:vararg
+.irp x, \regs
+        smax            \x, \x, \min
+.endr
+.irp x, \regs
+        smin            \x, \x, \max
+.endr
+.endm
+
 .macro  transpose_8x8B  r0, r1, r2, r3, r4, r5, r6, r7, r8, r9
         trn1            \r8\().8B,  \r0\().8B,  \r1\().8B
         trn2            \r9\().8B,  \r0\().8B,  \r1\().8B