diff mbox series

[FFmpeg-devel] arm: hevc_qpel: Fix the assembly to work with non-multiple of 8 widths

Message ID 20210825083555.1634717-1-martin@martin.st
State Accepted
Commit 2589060b92eeeb944c6e2b50e38412c0c5fabcf4
Headers show
Series [FFmpeg-devel] arm: hevc_qpel: Fix the assembly to work with non-multiple of 8 widths | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished
andriy/make_ppc success Make finished
andriy/make_fate_ppc success Make fate finished

Commit Message

Martin Storsjö Aug. 25, 2021, 8:35 a.m. UTC
This unbreaks the fate-checkasm-hevc_pel test on arm targets.

The assembly assumed that the width passed to the DSP functions is
a multiple of 8, while the checkasm test used other widths too.

This wasn't noticed before, because the hevc_pel checkasm tests
(that were added in 9c513edb7999a35ddcc6e3a8d984a96c8fb492a3 in
January) weren't run as part of fate until in
b492cacffd36ad4cb251ba1f13ac398318ee639a in August.

As this hasn't been an issue in practice with actual full decoding
tests, it seems like the actual decoder doesn't call these functions
with such widths. Therefore, we could alternatively fix the test
to only test things that the real decoder does, and this modification
could be reverted.

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavcodec/arm/hevcdsp_qpel_neon.S | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

Comments

Martin Storsjö Aug. 25, 2021, 8:26 p.m. UTC | #1
On Wed, 25 Aug 2021, Martin Storsjö wrote:

> This unbreaks the fate-checkasm-hevc_pel test on arm targets.
>
> The assembly assumed that the width passed to the DSP functions is
> a multiple of 8, while the checkasm test used other widths too.
>
> This wasn't noticed before, because the hevc_pel checkasm tests
> (that were added in 9c513edb7999a35ddcc6e3a8d984a96c8fb492a3 in
> January) weren't run as part of fate until in
> b492cacffd36ad4cb251ba1f13ac398318ee639a in August.
>
> As this hasn't been an issue in practice with actual full decoding
> tests, it seems like the actual decoder doesn't call these functions
> with such widths. Therefore, we could alternatively fix the test
> to only test things that the real decoder does, and this modification
> could be reverted.
>
> Signed-off-by: Martin Storsjö <martin@martin.st>
> ---
> libavcodec/arm/hevcdsp_qpel_neon.S | 18 +++++++++---------
> 1 file changed, 9 insertions(+), 9 deletions(-)

Pushed this one to unbreak fate now, although I guess it'd be nice to 
align the checkasm test with what the decoder actually does.

// Martin
diff mbox series

Patch

diff --git a/libavcodec/arm/hevcdsp_qpel_neon.S b/libavcodec/arm/hevcdsp_qpel_neon.S
index caa6efa766..f71bec05ed 100644
--- a/libavcodec/arm/hevcdsp_qpel_neon.S
+++ b/libavcodec/arm/hevcdsp_qpel_neon.S
@@ -237,7 +237,7 @@ 
         vld1.8    {d23}, [r2], r3
         bne 8b
         subs  r5, #8
-        beq       99f
+        ble       99f
         mov r4, r12
         add r6, #16
         mov r0, r6
@@ -280,7 +280,7 @@ 
         vld1.8    {d23}, [r2], r3
         bne 8b
         subs  r5, #8
-        beq       99f
+        ble       99f
         mov r4, r12
         add r6, #8
         mov r0, r6
@@ -310,7 +310,7 @@ 
         vld1.8    {d23}, [r2], r3
         bne 8b
         subs  r5, #8
-        beq       99f
+        ble       99f
         mov r4, r12
         add r6, #8
         mov r0, r6
@@ -377,7 +377,7 @@  endfunc
         vst1.16   {q7}, [r0], r1
         bne       8b
         subs      r5, #8
-        beq      99f
+        ble       99f
         mov       r4, r12
         add       r6, #16
         mov       r0, r6
@@ -417,7 +417,7 @@  endfunc
         vst1.8    d0, [r0], r1
         bne       8b
         subs      r5, #8
-        beq      99f
+        ble       99f
         mov       r4, r12
         add       r6, #8
         mov       r0, r6
@@ -446,7 +446,7 @@  endfunc
         vst1.8         d0, [r0], r1
         bne       8b
         subs      r5, #8
-        beq      99f
+        ble       99f
         mov       r4, r12
         add       r6, #8
         add       r10, #16
@@ -533,7 +533,7 @@  endfunc
         \filterh q7
         bne 8b
         subs  r5, #8
-        beq 99f
+        ble 99f
         mov r4, r12
         add r6, #16
         mov r0, r6
@@ -594,7 +594,7 @@  endfunc
         \filterh q7
         bne 8b
         subs  r5, #8
-        beq 99f
+        ble 99f
         mov r4, r12
         add r6, #8
         mov r0, r6
@@ -641,7 +641,7 @@  endfunc
         \filterh q7
         bne 8b
         subs  r5, #8
-        beq 99f
+        ble 99f
         mov r4, r12
         add r6, #8
         mov r0, r6