diff mbox series

[FFmpeg-devel,2/2] aarch64: me_cmp: Avoid using the non-unrolled codepath for the minimum unroll size

Message ID 20220928091334.7838-2-martin@martin.st
State Accepted
Commit 8089fe072e4552348a215d9fb4a0545ccf830763
Headers show
Series [FFmpeg-devel,1/2] aarch64: me_cmp: Avoid redundant loads in ff_pix_abs16_y2_neon | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Martin Storsjö Sept. 28, 2022, 9:13 a.m. UTC
Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavcodec/aarch64/me_cmp_neon.S | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

Comments

Hubert Mazur Sept. 28, 2022, 9:19 a.m. UTC | #1
LGTM.

On Wed, Sep 28, 2022 at 11:13 AM Martin Storsjö <martin@martin.st> wrote:

> Signed-off-by: Martin Storsjö <martin@martin.st>
> ---
>  libavcodec/aarch64/me_cmp_neon.S | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/libavcodec/aarch64/me_cmp_neon.S
> b/libavcodec/aarch64/me_cmp_neon.S
> index 832a7cb22d..c710358ab7 100644
> --- a/libavcodec/aarch64/me_cmp_neon.S
> +++ b/libavcodec/aarch64/me_cmp_neon.S
> @@ -471,7 +471,7 @@ function sse8_neon, export=1
>          movi            v21.4s, #0
>          movi            v20.4s, #0
>          cmp             w4, #4
> -        b.le            2f
> +        b.lt            2f
>
>  // make 4 iterations at once
>  1:
> @@ -534,7 +534,7 @@ function sse4_neon, export=1
>
>          movi            v16.4s, #0                      // clear the
> result accumulator
>          cmp             w4, #4
> -        b.le            2f
> +        b.lt            2f
>
>  // make 4 iterations at once
>  1:
> @@ -663,7 +663,7 @@ function vsse16_neon, export=1
>          cmp             w4, #3                          // check if we
> can make 3 iterations at once
>          usubl           v31.8h, v0.8b, v1.8b            // Signed
> difference of pix1[0] - pix2[0], first iteration
>          usubl2          v30.8h, v0.16b, v1.16b          // Signed
> difference of pix1[0] - pix2[0], first iteration
> -        b.le            2f
> +        b.lt            2f
>
>
>  1:
> --
> 2.25.1
>
>
diff mbox series

Patch

diff --git a/libavcodec/aarch64/me_cmp_neon.S b/libavcodec/aarch64/me_cmp_neon.S
index 832a7cb22d..c710358ab7 100644
--- a/libavcodec/aarch64/me_cmp_neon.S
+++ b/libavcodec/aarch64/me_cmp_neon.S
@@ -471,7 +471,7 @@  function sse8_neon, export=1
         movi            v21.4s, #0
         movi            v20.4s, #0
         cmp             w4, #4
-        b.le            2f
+        b.lt            2f
 
 // make 4 iterations at once
 1:
@@ -534,7 +534,7 @@  function sse4_neon, export=1
 
         movi            v16.4s, #0                      // clear the result accumulator
         cmp             w4, #4
-        b.le            2f
+        b.lt            2f
 
 // make 4 iterations at once
 1:
@@ -663,7 +663,7 @@  function vsse16_neon, export=1
         cmp             w4, #3                          // check if we can make 3 iterations at once
         usubl           v31.8h, v0.8b, v1.8b            // Signed difference of pix1[0] - pix2[0], first iteration
         usubl2          v30.8h, v0.16b, v1.16b          // Signed difference of pix1[0] - pix2[0], first iteration
-        b.le            2f
+        b.lt            2f
 
 
 1: