diff mbox

[FFmpeg-devel,2/2] x86/vf_blend: optimize difference and negation functions

Message ID 20170627154553.4800-2-jamrial@gmail.com
State Accepted
Commit 0daa1cf0731830288b8cc875ca1ee641cfe422b2
Headers show

Commit Message

James Almer June 27, 2017, 3:45 p.m. UTC
Process more pixels per loop.

Signed-off-by: James Almer <jamrial@gmail.com>
---
 libavfilter/x86/vf_blend.asm | 40 ++++++++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 16 deletions(-)

Comments

Paul B Mahol June 27, 2017, 3:50 p.m. UTC | #1
On 6/27/17, James Almer <jamrial@gmail.com> wrote:
> Process more pixels per loop.
>
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
>  libavfilter/x86/vf_blend.asm | 40 ++++++++++++++++++++++++----------------
>  1 file changed, 24 insertions(+), 16 deletions(-)
>

LGTM
James Almer June 27, 2017, 4:53 p.m. UTC | #2
On 6/27/2017 12:50 PM, Paul B Mahol wrote:
> On 6/27/17, James Almer <jamrial@gmail.com> wrote:
>> Process more pixels per loop.
>>
>> Signed-off-by: James Almer <jamrial@gmail.com>
>> ---
>>  libavfilter/x86/vf_blend.asm | 40 ++++++++++++++++++++++++----------------
>>  1 file changed, 24 insertions(+), 16 deletions(-)
>>
> 
> LGTM

Pushed, thanks.
diff mbox

Patch

diff --git a/libavfilter/x86/vf_blend.asm b/libavfilter/x86/vf_blend.asm
index 25f6f5affc..d5e512e6e0 100644
--- a/libavfilter/x86/vf_blend.asm
+++ b/libavfilter/x86/vf_blend.asm
@@ -268,21 +268,25 @@  BLEND_INIT phoenix, 4
 BLEND_END
 
 %macro BLEND_ABS 0
-BLEND_INIT difference, 3
+BLEND_INIT difference, 5
     pxor       m2, m2
 .nextrow:
     mov        xq, widthq
 
     .loop:
-        movh            m0, [topq + xq]
-        movh            m1, [bottomq + xq]
+        movu            m0, [topq + xq]
+        movu            m1, [bottomq + xq]
+        punpckhbw       m3, m0, m2
         punpcklbw       m0, m2
+        punpckhbw       m4, m1, m2
         punpcklbw       m1, m2
         psubw           m0, m1
+        psubw           m3, m4
         ABS1            m0, m1
-        packuswb        m0, m0
-        movh   [dstq + xq], m0
-        add             xq, mmsize / 2
+        ABS1            m3, m4
+        packuswb        m0, m3
+        mova   [dstq + xq], m0
+        add             xq, mmsize
     jl .loop
 BLEND_END
 
@@ -311,26 +315,30 @@  BLEND_INIT extremity, 8
     jl .loop
 BLEND_END
 
-BLEND_INIT negation, 5
+BLEND_INIT negation, 8
     pxor       m2, m2
     mova       m4, [pw_255]
 .nextrow:
     mov        xq, widthq
 
     .loop:
-        movh            m0, [topq + xq]
-        movh            m1, [bottomq + xq]
+        movu            m0, [topq + xq]
+        movu            m1, [bottomq + xq]
+        punpckhbw       m5, m0, m2
         punpcklbw       m0, m2
+        punpckhbw       m6, m1, m2
         punpcklbw       m1, m2
-        mova            m3, m4
-        psubw           m3, m0
+        psubw           m3, m4, m0
+        psubw           m7, m4, m5
         psubw           m3, m1
+        psubw           m7, m6
         ABS1            m3, m1
-        mova            m0, m4
-        psubw           m0, m3
-        packuswb        m0, m0
-        movh   [dstq + xq], m0
-        add             xq, mmsize / 2
+        ABS1            m7, m1
+        psubw           m0, m4, m3
+        psubw           m1, m4, m7
+        packuswb        m0, m1
+        mova   [dstq + xq], m0
+        add             xq, mmsize
     jl .loop
 BLEND_END
 %endmacro