Message ID | 20221114143551.9740-1-bin.wang@intel.com |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel,v1] libavfilter/x86/vf_convolution: fix sobel swap issue on WIN64 | expand |
Context | Check | Description |
---|---|---|
yinshiyou/make_loongarch64 | success | Make finished |
yinshiyou/make_fate_loongarch64 | success | Make fate finished |
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | success | Make fate finished |
On 11/14/2022 11:35 AM, bin.wang-at-intel.com@ffmpeg.org wrote: > From: "Wang, Bin" <bin.wang@intel.com> > > Signed-off-by: Wang, Bin <bin.wang@intel.com> > --- > libavfilter/x86/vf_convolution.asm | 6 +++--- > 1 file changed, 3 insertions(+), 3 deletions(-) > > diff --git a/libavfilter/x86/vf_convolution.asm b/libavfilter/x86/vf_convolution.asm > index c912d56752..a6be95690b 100644 > --- a/libavfilter/x86/vf_convolution.asm > +++ b/libavfilter/x86/vf_convolution.asm > @@ -189,8 +189,8 @@ cglobal filter_sobel, 4, 15, 7, dst, width, matrix, ptr, c0, c1, c2, c3, c4, c5, > cglobal filter_sobel, 4, 15, 7, dst, width, rdiv, bias, matrix, ptr, c0, c1, c2, c3, c4, c5, c6, c7, c8, r, x > %endif > %if WIN64 > - SWAP xmm0, xmm2 > - SWAP xmm1, xmm3 > + VBROADCASTSS m0, xmm2 > + VBROADCASTSS m1, xmm3 The other two VBROADCASTSS below should be used on UNIX64 only. Otherwise they will overwrite m0 and m1 on WIN64. > mov r2q, matrixmp > mov r3q, ptrmp > DEFINE_ARGS dst, width, matrix, ptr, c0, c1, c2, c3, c4, c5, c6, c7, c8, r, x > @@ -281,7 +281,7 @@ cglobal filter_sobel, 4, 15, 7, dst, width, rdiv, bias, matrix, ptr, c0, c1, c2, > fmaddss xmm4, xmm5, xmm5, xmm4 > > sqrtps xmm4, xmm4 > - fmaddss xmm4, xmm4, xmm0, xmm1 ;sum = sum * rdiv + bias > + fmaddss xmm4, xmm4, xm0, xm1 ;sum = sum * rdiv + bias > cvttps2dq xmm4, xmm4 ; trunc to integer > packssdw xmm4, xmm4 > packuswb xmm4, xmm4
diff --git a/libavfilter/x86/vf_convolution.asm b/libavfilter/x86/vf_convolution.asm index c912d56752..a6be95690b 100644 --- a/libavfilter/x86/vf_convolution.asm +++ b/libavfilter/x86/vf_convolution.asm @@ -189,8 +189,8 @@ cglobal filter_sobel, 4, 15, 7, dst, width, matrix, ptr, c0, c1, c2, c3, c4, c5, cglobal filter_sobel, 4, 15, 7, dst, width, rdiv, bias, matrix, ptr, c0, c1, c2, c3, c4, c5, c6, c7, c8, r, x %endif %if WIN64 - SWAP xmm0, xmm2 - SWAP xmm1, xmm3 + VBROADCASTSS m0, xmm2 + VBROADCASTSS m1, xmm3 mov r2q, matrixmp mov r3q, ptrmp DEFINE_ARGS dst, width, matrix, ptr, c0, c1, c2, c3, c4, c5, c6, c7, c8, r, x @@ -281,7 +281,7 @@ cglobal filter_sobel, 4, 15, 7, dst, width, rdiv, bias, matrix, ptr, c0, c1, c2, fmaddss xmm4, xmm5, xmm5, xmm4 sqrtps xmm4, xmm4 - fmaddss xmm4, xmm4, xmm0, xmm1 ;sum = sum * rdiv + bias + fmaddss xmm4, xmm4, xm0, xm1 ;sum = sum * rdiv + bias cvttps2dq xmm4, xmm4 ; trunc to integer packssdw xmm4, xmm4 packuswb xmm4, xmm4