diff mbox series

[FFmpeg-devel,1/3] x86/vf_gblur: fix postscale_slice prologue

Message ID 20210217164106.6370-1-jamrial@gmail.com
State Accepted
Commit 2b4da1cb8c2984b37e5c912e103a1b8b734e7c1f
Headers show
Series [FFmpeg-devel,1/3] x86/vf_gblur: fix postscale_slice prologue | expand

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate success Make fate finished
andriy/PPC64_make warning Make failed

Commit Message

James Almer Feb. 17, 2021, 4:41 p.m. UTC
x86_32 ABI does not pass float arguments directly on xmm regs, and the Win64
ABI uses only the first four regs for this purpose.

Signed-off-by: James Almer <jamrial@gmail.com>
---
 libavfilter/vf_gblur.c       |  3 +--
 libavfilter/x86/vf_gblur.asm | 29 +++++++++++++----------------
 2 files changed, 14 insertions(+), 18 deletions(-)

Comments

Paul B Mahol Feb. 17, 2021, 4:46 p.m. UTC | #1
lgtm
Michael Niedermayer Feb. 17, 2021, 6:34 p.m. UTC | #2
On Wed, Feb 17, 2021 at 01:41:04PM -0300, James Almer wrote:
> x86_32 ABI does not pass float arguments directly on xmm regs, and the Win64
> ABI uses only the first four regs for this purpose.
> 
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
>  libavfilter/vf_gblur.c       |  3 +--
>  libavfilter/x86/vf_gblur.asm | 29 +++++++++++++----------------
>  2 files changed, 14 insertions(+), 18 deletions(-)
> 
> diff --git a/libavfilter/vf_gblur.c b/libavfilter/vf_gblur.c
> index 109a7a95f9..40956e122d 100644
> --- a/libavfilter/vf_gblur.c
> +++ b/libavfilter/vf_gblur.c
> @@ -234,8 +234,7 @@ void ff_gblur_init(GBlurContext *s)
>  {
>      s->horiz_slice = horiz_slice_c;
>      s->postscale_slice = postscale_c;
> -    if (ARCH_X86_64)
> -        ff_gblur_init_x86(s);
> +    ff_gblur_init_x86(s);
>  }

fails on arm

LD	ffmpeg_g
libavfilter/libavfilter.a(vf_gblur.o): In function `ff_gblur_init':
arm/src/libavfilter/vf_gblur.c:237: undefined reference to `ff_gblur_init_x86'
arm/src/libavfilter/vf_gblur.c:237: undefined reference to `ff_gblur_init_x86'
collect2: error: ld returned 1 exit status
Makefile:124: recipe for target 'ffmpeg_g' failed
make: *** [ffmpeg_g] Error 1


[...]
James Almer Feb. 17, 2021, 6:46 p.m. UTC | #3
On 2/17/2021 3:34 PM, Michael Niedermayer wrote:
> On Wed, Feb 17, 2021 at 01:41:04PM -0300, James Almer wrote:
>> x86_32 ABI does not pass float arguments directly on xmm regs, and the Win64
>> ABI uses only the first four regs for this purpose.
>>
>> Signed-off-by: James Almer <jamrial@gmail.com>
>> ---
>>   libavfilter/vf_gblur.c       |  3 +--
>>   libavfilter/x86/vf_gblur.asm | 29 +++++++++++++----------------
>>   2 files changed, 14 insertions(+), 18 deletions(-)
>>
>> diff --git a/libavfilter/vf_gblur.c b/libavfilter/vf_gblur.c
>> index 109a7a95f9..40956e122d 100644
>> --- a/libavfilter/vf_gblur.c
>> +++ b/libavfilter/vf_gblur.c
>> @@ -234,8 +234,7 @@ void ff_gblur_init(GBlurContext *s)
>>   {
>>       s->horiz_slice = horiz_slice_c;
>>       s->postscale_slice = postscale_c;
>> -    if (ARCH_X86_64)
>> -        ff_gblur_init_x86(s);
>> +    ff_gblur_init_x86(s);
>>   }
> 
> fails on arm
> 
> LD	ffmpeg_g
> libavfilter/libavfilter.a(vf_gblur.o): In function `ff_gblur_init':
> arm/src/libavfilter/vf_gblur.c:237: undefined reference to `ff_gblur_init_x86'
> arm/src/libavfilter/vf_gblur.c:237: undefined reference to `ff_gblur_init_x86'
> collect2: error: ld returned 1 exit status
> Makefile:124: recipe for target 'ffmpeg_g' failed
> make: *** [ffmpeg_g] Error 1

Fixed, sorry about that.
diff mbox series

Patch

diff --git a/libavfilter/vf_gblur.c b/libavfilter/vf_gblur.c
index 109a7a95f9..40956e122d 100644
--- a/libavfilter/vf_gblur.c
+++ b/libavfilter/vf_gblur.c
@@ -234,8 +234,7 @@  void ff_gblur_init(GBlurContext *s)
 {
     s->horiz_slice = horiz_slice_c;
     s->postscale_slice = postscale_c;
-    if (ARCH_X86_64)
-        ff_gblur_init_x86(s);
+    ff_gblur_init_x86(s);
 }
 
 static int config_input(AVFilterLink *inlink)
diff --git a/libavfilter/x86/vf_gblur.asm b/libavfilter/x86/vf_gblur.asm
index c29ecba889..c2b2998202 100644
--- a/libavfilter/x86/vf_gblur.asm
+++ b/libavfilter/x86/vf_gblur.asm
@@ -185,27 +185,24 @@  HORIZ_SLICE
 %endif
 
 %macro POSTSCALE_SLICE 0
-%if UNIX64
-cglobal postscale_slice, 2, 2, 4, ptr, length
-%else
-cglobal postscale_slice, 5, 5, 4, ptr, length, postscale, min, max
-%endif
+cglobal postscale_slice, 2, 2, 4, ptr, length, postscale, min, max
     shl lengthd, 2
     add ptrq, lengthq
     neg lengthq
-%if WIN64
+%if ARCH_X86_32
+    VBROADCASTSS m0, postscalem
+    VBROADCASTSS m1, minm
+    VBROADCASTSS m2, maxm
+%elif WIN64
     SWAP 0, 2
     SWAP 1, 3
-    SWAP 2, 4
-%endif
-%if cpuflag(avx2)
-    vbroadcastss  m0, xm0
-    vbroadcastss  m1, xm1
-    vbroadcastss  m2, xm2
-%else
-    shufps   xm0, xm0, 0
-    shufps   xm1, xm1, 0
-    shufps   xm2, xm2, 0
+    VBROADCASTSS m0, xm0
+    VBROADCASTSS m1, xm1
+    VBROADCASTSS m2, maxm
+%else ; UNIX64
+    VBROADCASTSS m0, xm0
+    VBROADCASTSS m1, xm1
+    VBROADCASTSS m2, xm3
 %endif
 
     .loop: