diff mbox series

[FFmpeg-devel,v2] libavfilter/x86/vf_convolution: fix sobel swap issue on WIN64

Message ID 20221114152023.11003-1-bin.wang@intel.com
State Accepted
Commit 459527108ad409c026da93e9f49f4d3006c0f2f9
Headers show
Series [FFmpeg-devel,v2] libavfilter/x86/vf_convolution: fix sobel swap issue on WIN64 | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Wang, Bin Nov. 14, 2022, 3:20 p.m. UTC
From: "Wang, Bin" <bin.wang@intel.com>

Signed-off-by: Wang, Bin <bin.wang@intel.com>
---
 libavfilter/x86/vf_convolution.asm | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

Comments

James Almer Nov. 14, 2022, 4:34 p.m. UTC | #1
On 11/14/2022 12:20 PM, bin.wang-at-intel.com@ffmpeg.org wrote:
> From: "Wang, Bin" <bin.wang@intel.com>
> 
> Signed-off-by: Wang, Bin <bin.wang@intel.com>
> ---
>   libavfilter/x86/vf_convolution.asm | 11 ++++++-----
>   1 file changed, 6 insertions(+), 5 deletions(-)
> 
> diff --git a/libavfilter/x86/vf_convolution.asm b/libavfilter/x86/vf_convolution.asm
> index c912d56752..9ac9ef5d73 100644
> --- a/libavfilter/x86/vf_convolution.asm
> +++ b/libavfilter/x86/vf_convolution.asm
> @@ -189,15 +189,16 @@ cglobal filter_sobel, 4, 15, 7, dst, width, matrix, ptr, c0, c1, c2, c3, c4, c5,
>   cglobal filter_sobel, 4, 15, 7, dst, width, rdiv, bias, matrix, ptr, c0, c1, c2, c3, c4, c5, c6, c7, c8, r, x
>   %endif
>   %if WIN64
> -    SWAP xmm0, xmm2
> -    SWAP xmm1, xmm3
> +    VBROADCASTSS m0, xmm2
> +    VBROADCASTSS m1, xmm3
>       mov  r2q, matrixmp
>       mov  r3q, ptrmp
>       DEFINE_ARGS dst, width, matrix, ptr, c0, c1, c2, c3, c4, c5, c6, c7, c8, r, x
> -%endif
> -    movsxdifnidn widthq, widthd
> +%else
>       VBROADCASTSS m0, xmm0
>       VBROADCASTSS m1, xmm1
> +%endif
> +    movsxdifnidn widthq, widthd
>       pxor  m6, m6
>       mov   c0q, [ptrq + 0*gprsize]
>       mov   c1q, [ptrq + 1*gprsize]
> @@ -281,7 +282,7 @@ cglobal filter_sobel, 4, 15, 7, dst, width, rdiv, bias, matrix, ptr, c0, c1, c2,
>       fmaddss xmm4, xmm5, xmm5, xmm4
>   
>       sqrtps    xmm4, xmm4
> -    fmaddss   xmm4, xmm4, xmm0, xmm1     ;sum = sum * rdiv + bias
> +    fmaddss   xmm4, xmm4, xm0, xm1     ;sum = sum * rdiv + bias
>       cvttps2dq xmm4, xmm4     ; trunc to integer
>       packssdw  xmm4, xmm4
>       packuswb  xmm4, xmm4

Should be ok.
Xiang, Haihao Nov. 21, 2022, 4:37 a.m. UTC | #2
On Mon, 2022-11-14 at 13:34 -0300, James Almer wrote:
> On 11/14/2022 12:20 PM, bin.wang-at-intel.com@ffmpeg.org wrote:
> > From: "Wang, Bin" <bin.wang@intel.com>
> > 
> > Signed-off-by: Wang, Bin <bin.wang@intel.com>
> > ---
> >   libavfilter/x86/vf_convolution.asm | 11 ++++++-----
> >   1 file changed, 6 insertions(+), 5 deletions(-)
> > 
> > diff --git a/libavfilter/x86/vf_convolution.asm
> > b/libavfilter/x86/vf_convolution.asm
> > index c912d56752..9ac9ef5d73 100644
> > --- a/libavfilter/x86/vf_convolution.asm
> > +++ b/libavfilter/x86/vf_convolution.asm
> > @@ -189,15 +189,16 @@ cglobal filter_sobel, 4, 15, 7, dst, width, matrix,
> > ptr, c0, c1, c2, c3, c4, c5,
> >   cglobal filter_sobel, 4, 15, 7, dst, width, rdiv, bias, matrix, ptr, c0,
> > c1, c2, c3, c4, c5, c6, c7, c8, r, x
> >   %endif
> >   %if WIN64
> > -    SWAP xmm0, xmm2
> > -    SWAP xmm1, xmm3
> > +    VBROADCASTSS m0, xmm2
> > +    VBROADCASTSS m1, xmm3
> >       mov  r2q, matrixmp
> >       mov  r3q, ptrmp
> >       DEFINE_ARGS dst, width, matrix, ptr, c0, c1, c2, c3, c4, c5, c6, c7,
> > c8, r, x
> > -%endif
> > -    movsxdifnidn widthq, widthd
> > +%else
> >       VBROADCASTSS m0, xmm0
> >       VBROADCASTSS m1, xmm1
> > +%endif
> > +    movsxdifnidn widthq, widthd
> >       pxor  m6, m6
> >       mov   c0q, [ptrq + 0*gprsize]
> >       mov   c1q, [ptrq + 1*gprsize]
> > @@ -281,7 +282,7 @@ cglobal filter_sobel, 4, 15, 7, dst, width, rdiv, bias,
> > matrix, ptr, c0, c1, c2,
> >       fmaddss xmm4, xmm5, xmm5, xmm4
> >   
> >       sqrtps    xmm4, xmm4
> > -    fmaddss   xmm4, xmm4, xmm0, xmm1     ;sum = sum * rdiv + bias
> > +    fmaddss   xmm4, xmm4, xm0, xm1     ;sum = sum * rdiv + bias
> >       cvttps2dq xmm4, xmm4     ; trunc to integer
> >       packssdw  xmm4, xmm4
> >       packuswb  xmm4, xmm4
> 
> Should be ok.

Applied, 

-Haihao
diff mbox series

Patch

diff --git a/libavfilter/x86/vf_convolution.asm b/libavfilter/x86/vf_convolution.asm
index c912d56752..9ac9ef5d73 100644
--- a/libavfilter/x86/vf_convolution.asm
+++ b/libavfilter/x86/vf_convolution.asm
@@ -189,15 +189,16 @@  cglobal filter_sobel, 4, 15, 7, dst, width, matrix, ptr, c0, c1, c2, c3, c4, c5,
 cglobal filter_sobel, 4, 15, 7, dst, width, rdiv, bias, matrix, ptr, c0, c1, c2, c3, c4, c5, c6, c7, c8, r, x
 %endif
 %if WIN64
-    SWAP xmm0, xmm2
-    SWAP xmm1, xmm3
+    VBROADCASTSS m0, xmm2
+    VBROADCASTSS m1, xmm3
     mov  r2q, matrixmp
     mov  r3q, ptrmp
     DEFINE_ARGS dst, width, matrix, ptr, c0, c1, c2, c3, c4, c5, c6, c7, c8, r, x
-%endif
-    movsxdifnidn widthq, widthd
+%else
     VBROADCASTSS m0, xmm0
     VBROADCASTSS m1, xmm1
+%endif
+    movsxdifnidn widthq, widthd
     pxor  m6, m6
     mov   c0q, [ptrq + 0*gprsize]
     mov   c1q, [ptrq + 1*gprsize]
@@ -281,7 +282,7 @@  cglobal filter_sobel, 4, 15, 7, dst, width, rdiv, bias, matrix, ptr, c0, c1, c2,
     fmaddss xmm4, xmm5, xmm5, xmm4
 
     sqrtps    xmm4, xmm4
-    fmaddss   xmm4, xmm4, xmm0, xmm1     ;sum = sum * rdiv + bias
+    fmaddss   xmm4, xmm4, xm0, xm1     ;sum = sum * rdiv + bias
     cvttps2dq xmm4, xmm4     ; trunc to integer
     packssdw  xmm4, xmm4
     packuswb  xmm4, xmm4