diff mbox

[FFmpeg-devel,03/13] arm: vp9itxfm: Simplify the stack alignment code

Message ID 1484000119-4959-3-git-send-email-martin@martin.st
State Accepted
Commit e5b0fc170f85b00f7dd0ac514918fb5c95253d39
Headers show

Commit Message

Martin Storsjö Jan. 9, 2017, 10:15 p.m. UTC
From: Janne Grunau <janne-libav@jannau.net>

This is one instruction less for thumb, and only have got
1/2 arm/thumb specific instructions.

This is cherrypicked from libav commit
e5b0fc170f85b00f7dd0ac514918fb5c95253d39.
---
 libavcodec/arm/vp9itxfm_neon.S | 28 ++++++++++++----------------
 1 file changed, 12 insertions(+), 16 deletions(-)
diff mbox

Patch

diff --git a/libavcodec/arm/vp9itxfm_neon.S b/libavcodec/arm/vp9itxfm_neon.S
index 06470a3..d7a2654 100644
--- a/libavcodec/arm/vp9itxfm_neon.S
+++ b/libavcodec/arm/vp9itxfm_neon.S
@@ -791,15 +791,13 @@  function ff_vp9_\txfm1\()_\txfm2\()_16x16_add_neon, export=1
 .ifnc \txfm1\()_\txfm2,idct_idct
         vpush           {q4-q7}
 .endif
-        mov             r7,  sp
 
         @ Align the stack, allocate a temp buffer
-T       mov             r12, sp
-T       bic             r12, r12, #15
-T       sub             r12, r12, #512
-T       mov             sp,  r12
-A       bic             sp,  sp,  #15
-A       sub             sp,  sp,  #512
+T       mov             r7,  sp
+T       and             r7,  r7,  #15
+A       and             r7,  sp,  #15
+        add             r7,  r7,  #512
+        sub             sp,  sp,  r7
 
         mov             r4,  r0
         mov             r5,  r1
@@ -828,7 +826,7 @@  A       sub             sp,  sp,  #512
         bl              \txfm2\()16_1d_4x16_pass2_neon
 .endr
 
-        mov             sp,  r7
+        add             sp,  sp,  r7
 .ifnc \txfm1\()_\txfm2,idct_idct
         vpop            {q4-q7}
 .endif
@@ -1117,15 +1115,13 @@  function ff_vp9_idct_idct_32x32_add_neon, export=1
         beq             idct32x32_dc_add_neon
         push            {r4-r7,lr}
         vpush           {q4-q7}
-        mov             r7,  sp
 
         @ Align the stack, allocate a temp buffer
-T       mov             r12, sp
-T       bic             r12, r12, #15
-T       sub             r12, r12, #2048
-T       mov             sp,  r12
-A       bic             sp,  sp,  #15
-A       sub             sp,  sp,  #2048
+T       mov             r7,  sp
+T       and             r7,  r7,  #15
+A       and             r7,  sp,  #15
+        add             r7,  r7,  #2048
+        sub             sp,  sp,  r7
 
         mov             r4,  r0
         mov             r5,  r1
@@ -1143,7 +1139,7 @@  A       sub             sp,  sp,  #2048
         bl              idct32_1d_4x32_pass2_neon
 .endr
 
-        mov             sp,  r7
+        add             sp,  sp,  r7
         vpop            {q4-q7}
         pop             {r4-r7,pc}
 endfunc