diff mbox

[FFmpeg-devel,22/34] arm: vp9lpf: Interleave the start of flat8in into the calculation above

Message ID 1488967274-8143-22-git-send-email-martin@martin.st
State Accepted
Commit e18c39005ad1dbb178b336f691da1de91afd434e
Headers show

Commit Message

Martin Storsjö March 8, 2017, 10:01 a.m. UTC
This adds lots of extra .ifs, but speeds it up by a couple cycles,
by avoiding stalls.

This is cherrypicked from libav commit
e18c39005ad1dbb178b336f691da1de91afd434e.
---
 libavcodec/arm/vp9lpf_neon.S | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)
diff mbox

Patch

diff --git a/libavcodec/arm/vp9lpf_neon.S b/libavcodec/arm/vp9lpf_neon.S
index 3d289e5..b90c536 100644
--- a/libavcodec/arm/vp9lpf_neon.S
+++ b/libavcodec/arm/vp9lpf_neon.S
@@ -182,16 +182,20 @@ 
 
         vmovl.u8        q0,  d22                @ p1
         vmovl.u8        q1,  d25                @ q1
+.if \wd >= 8
+        vmov            r2,  r3,  d6
+.endif
         vaddw.s8        q0,  q0,  \tmp3         @ p1 + f
         vsubw.s8        q1,  q1,  \tmp3         @ q1 - f
+.if \wd >= 8
+        orrs            r2,  r2,  r3
+.endif
         vqmovun.s16     d0,  q0                 @ out p1
         vqmovun.s16     d2,  q1                 @ out q1
         vbit            d22, d0,  d5            @ if (!hev && fm && !flat8in)
         vbit            d25, d2,  d5
 
 .if \wd >= 8
-        vmov            r2,  r3,  d6
-        orrs            r2,  r2,  r3
         @ If no pixels need flat8in, jump to flat8out
         @ (or to a writeout of the inner 4 pixels, for wd=8)
         beq             6f