diff mbox series

[FFmpeg-devel] arm: vc1dsp: Canonicalize the syntax for aligned NEON loads/stores

Message ID 20220928073145.40268-1-martin@martin.st
State Accepted
Commit 86519234b8df379948fde1493f6a6679632f6d45
Headers show
Series [FFmpeg-devel] arm: vc1dsp: Canonicalize the syntax for aligned NEON loads/stores | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Martin Storsjö Sept. 28, 2022, 7:31 a.m. UTC
This hopefully should fix building with older toolchains, hopefully
fixing the fate failures on
http://fate.ffmpeg.org/history.cgi?slot=armel5tej-qemu-debian-gcc4.4.

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavcodec/arm/vc1dsp_neon.S | 40 ++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

Comments

Martin Storsjö Sept. 29, 2022, 7:27 a.m. UTC | #1
On Wed, 28 Sep 2022, Martin Storsjö wrote:

> This hopefully should fix building with older toolchains, hopefully
> fixing the fate failures on
> http://fate.ffmpeg.org/history.cgi?slot=armel5tej-qemu-debian-gcc4.4.
>
> Signed-off-by: Martin Storsjö <martin@martin.st>
> ---
> libavcodec/arm/vc1dsp_neon.S | 40 ++++++++++++++++++------------------
> 1 file changed, 20 insertions(+), 20 deletions(-)

I'll go ahead and push this, as I don't foresee objections to it.

// Martin
diff mbox series

Patch

diff --git a/libavcodec/arm/vc1dsp_neon.S b/libavcodec/arm/vc1dsp_neon.S
index 96014fbebc..cdfb4a3f6c 100644
--- a/libavcodec/arm/vc1dsp_neon.S
+++ b/libavcodec/arm/vc1dsp_neon.S
@@ -1310,17 +1310,17 @@  endfunc
 function ff_vc1_v_loop_filter8_neon, export=1
         sub             r3, r0, r1, lsl #2
         vldr            d0, .Lcoeffs
-        vld1.32         {d1}, [r0 :64], r1      @ P5
-        vld1.32         {d2}, [r3 :64], r1      @ P1
-        vld1.32         {d3}, [r3 :64], r1      @ P2
-        vld1.32         {d4}, [r0 :64], r1      @ P6
-        vld1.32         {d5}, [r3 :64], r1      @ P3
-        vld1.32         {d6}, [r0 :64], r1      @ P7
+        vld1.32         {d1}, [r0, :64], r1     @ P5
+        vld1.32         {d2}, [r3, :64], r1     @ P1
+        vld1.32         {d3}, [r3, :64], r1     @ P2
+        vld1.32         {d4}, [r0, :64], r1     @ P6
+        vld1.32         {d5}, [r3, :64], r1     @ P3
+        vld1.32         {d6}, [r0, :64], r1     @ P7
         vshll.u8        q8, d1, #1              @ 2*P5
         vshll.u8        q9, d2, #1              @ 2*P1
-        vld1.32         {d7}, [r3 :64]          @ P4
+        vld1.32         {d7}, [r3, :64]         @ P4
         vmovl.u8        q1, d3                  @ P2
-        vld1.32         {d20}, [r0 :64]         @ P8
+        vld1.32         {d20}, [r0, :64]        @ P8
         vmovl.u8        q11, d4                 @ P6
         vdup.16         q12, r2                 @ pq
         vmovl.u8        q13, d5                 @ P3
@@ -1375,8 +1375,8 @@  function ff_vc1_v_loop_filter8_neon, export=1
         vmla.i16        q1, q0, q2              @ invert d depending on clip_sign & a0_sign, or zero it if they match, and accumulate into P5
         vqmovun.s16     d0, q3
         vqmovun.s16     d1, q1
-        vst1.32         {d0}, [r3 :64], r1
-        vst1.32         {d1}, [r3 :64]
+        vst1.32         {d0}, [r3, :64], r1
+        vst1.32         {d1}, [r3, :64]
 1:      bx              lr
 endfunc
 
@@ -1491,17 +1491,17 @@  function ff_vc1_v_loop_filter16_neon, export=1
         vpush           {d8-d15}
         sub             r3, r0, r1, lsl #2
         vldr            d0, .Lcoeffs
-        vld1.64         {q1}, [r0 :128], r1     @ P5
-        vld1.64         {q2}, [r3 :128], r1     @ P1
-        vld1.64         {q3}, [r3 :128], r1     @ P2
-        vld1.64         {q4}, [r0 :128], r1     @ P6
-        vld1.64         {q5}, [r3 :128], r1     @ P3
-        vld1.64         {q6}, [r0 :128], r1     @ P7
+        vld1.64         {q1}, [r0, :128], r1    @ P5
+        vld1.64         {q2}, [r3, :128], r1    @ P1
+        vld1.64         {q3}, [r3, :128], r1    @ P2
+        vld1.64         {q4}, [r0, :128], r1    @ P6
+        vld1.64         {q5}, [r3, :128], r1    @ P3
+        vld1.64         {q6}, [r0, :128], r1    @ P7
         vshll.u8        q7, d2, #1              @ 2*P5[0..7]
         vshll.u8        q8, d4, #1              @ 2*P1[0..7]
-        vld1.64         {q9}, [r3 :128]         @ P4
+        vld1.64         {q9}, [r3, :128]        @ P4
         vmovl.u8        q10, d6                 @ P2[0..7]
-        vld1.64         {q11}, [r0 :128]        @ P8
+        vld1.64         {q11}, [r0, :128]       @ P8
         vmovl.u8        q12, d8                 @ P6[0..7]
         vdup.16         q13, r2                 @ pq
         vshll.u8        q2, d5, #1              @ 2*P1[8..15]
@@ -1611,8 +1611,8 @@  function ff_vc1_v_loop_filter16_neon, export=1
         vqmovun.s16     d0, q6
         vqmovun.s16     d5, q9
         vqmovun.s16     d1, q1
-        vst1.64         {q2}, [r3 :128], r1
-        vst1.64         {q0}, [r3 :128]
+        vst1.64         {q2}, [r3, :128], r1
+        vst1.64         {q0}, [r3, :128]
 1:      vpop            {d8-d15}
         bx              lr
 endfunc