diff mbox

[FFmpeg-devel,2/2] x86/vf_v360: use a faster horizontal add in remap4_8bit_line_avx2

Message ID 20190906153003.1093-2-jamrial@gmail.com
State Accepted
Commit 4857688732e27e33fac58e2bd30989f20da2f181
Headers show

Commit Message

James Almer Sept. 6, 2019, 3:30 p.m. UTC
Signed-off-by: James Almer <jamrial@gmail.com>
---
 libavfilter/x86/vf_v360.asm | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

Comments

Paul B Mahol Sept. 6, 2019, 3:40 p.m. UTC | #1
LGTM

On 9/6/19, James Almer <jamrial@gmail.com> wrote:
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
>  libavfilter/x86/vf_v360.asm | 11 ++++-------
>  1 file changed, 4 insertions(+), 7 deletions(-)
>
> diff --git a/libavfilter/x86/vf_v360.asm b/libavfilter/x86/vf_v360.asm
> index f49702b603..a0936eb6dc 100644
> --- a/libavfilter/x86/vf_v360.asm
> +++ b/libavfilter/x86/vf_v360.asm
> @@ -130,14 +130,11 @@ cglobal remap4_8bit_line, 7, 9, 11, dst, width, src,
> in_linesize, u, v, ker, x,
>          pmulld          m4, m5
>
>          paddd           m2, m4
> -        vextracti128   xm1, m2, 1
> -        paddd           m1, m2
> -        phaddd          m1, m1
> -        phaddd          m1, m1
> -        psrld           m1, m1, 0xe
> -        packuswb        m1, m1
> +        HADDD           m2, m1
> +        psrld           m2, m2, 0xe
> +        packuswb        m2, m2
>
> -        pextrb   [dstq+xq], xm1, 0
> +        pextrb   [dstq+xq], xm2, 0
>
>          add   xq, 1
>          add   yq, 32
> --
> 2.22.0
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
James Almer Sept. 6, 2019, 3:49 p.m. UTC | #2
On 9/6/2019 12:40 PM, Paul B Mahol wrote:
> LGTM
> 
> On 9/6/19, James Almer <jamrial@gmail.com> wrote:
>> Signed-off-by: James Almer <jamrial@gmail.com>
>> ---
>>  libavfilter/x86/vf_v360.asm | 11 ++++-------
>>  1 file changed, 4 insertions(+), 7 deletions(-)
>>
>> diff --git a/libavfilter/x86/vf_v360.asm b/libavfilter/x86/vf_v360.asm
>> index f49702b603..a0936eb6dc 100644
>> --- a/libavfilter/x86/vf_v360.asm
>> +++ b/libavfilter/x86/vf_v360.asm
>> @@ -130,14 +130,11 @@ cglobal remap4_8bit_line, 7, 9, 11, dst, width, src,
>> in_linesize, u, v, ker, x,
>>          pmulld          m4, m5
>>
>>          paddd           m2, m4
>> -        vextracti128   xm1, m2, 1
>> -        paddd           m1, m2
>> -        phaddd          m1, m1
>> -        phaddd          m1, m1
>> -        psrld           m1, m1, 0xe
>> -        packuswb        m1, m1
>> +        HADDD           m2, m1
>> +        psrld           m2, m2, 0xe
>> +        packuswb        m2, m2
>>
>> -        pextrb   [dstq+xq], xm1, 0
>> +        pextrb   [dstq+xq], xm2, 0
>>
>>          add   xq, 1
>>          add   yq, 32

Pushed, thanks.
diff mbox

Patch

diff --git a/libavfilter/x86/vf_v360.asm b/libavfilter/x86/vf_v360.asm
index f49702b603..a0936eb6dc 100644
--- a/libavfilter/x86/vf_v360.asm
+++ b/libavfilter/x86/vf_v360.asm
@@ -130,14 +130,11 @@  cglobal remap4_8bit_line, 7, 9, 11, dst, width, src, in_linesize, u, v, ker, x,
         pmulld          m4, m5
 
         paddd           m2, m4
-        vextracti128   xm1, m2, 1
-        paddd           m1, m2
-        phaddd          m1, m1
-        phaddd          m1, m1
-        psrld           m1, m1, 0xe
-        packuswb        m1, m1
+        HADDD           m2, m1
+        psrld           m2, m2, 0xe
+        packuswb        m2, m2
 
-        pextrb   [dstq+xq], xm1, 0
+        pextrb   [dstq+xq], xm2, 0
 
         add   xq, 1
         add   yq, 32