Message ID | 20221114152023.11003-1-bin.wang@intel.com |
---|---|
State | Accepted |
Commit | 459527108ad409c026da93e9f49f4d3006c0f2f9 |
Headers | show |
Series | [FFmpeg-devel,v2] libavfilter/x86/vf_convolution: fix sobel swap issue on WIN64 | expand |
Context | Check | Description |
---|---|---|
yinshiyou/make_loongarch64 | success | Make finished |
yinshiyou/make_fate_loongarch64 | success | Make fate finished |
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | success | Make fate finished |
On 11/14/2022 12:20 PM, bin.wang-at-intel.com@ffmpeg.org wrote: > From: "Wang, Bin" <bin.wang@intel.com> > > Signed-off-by: Wang, Bin <bin.wang@intel.com> > --- > libavfilter/x86/vf_convolution.asm | 11 ++++++----- > 1 file changed, 6 insertions(+), 5 deletions(-) > > diff --git a/libavfilter/x86/vf_convolution.asm b/libavfilter/x86/vf_convolution.asm > index c912d56752..9ac9ef5d73 100644 > --- a/libavfilter/x86/vf_convolution.asm > +++ b/libavfilter/x86/vf_convolution.asm > @@ -189,15 +189,16 @@ cglobal filter_sobel, 4, 15, 7, dst, width, matrix, ptr, c0, c1, c2, c3, c4, c5, > cglobal filter_sobel, 4, 15, 7, dst, width, rdiv, bias, matrix, ptr, c0, c1, c2, c3, c4, c5, c6, c7, c8, r, x > %endif > %if WIN64 > - SWAP xmm0, xmm2 > - SWAP xmm1, xmm3 > + VBROADCASTSS m0, xmm2 > + VBROADCASTSS m1, xmm3 > mov r2q, matrixmp > mov r3q, ptrmp > DEFINE_ARGS dst, width, matrix, ptr, c0, c1, c2, c3, c4, c5, c6, c7, c8, r, x > -%endif > - movsxdifnidn widthq, widthd > +%else > VBROADCASTSS m0, xmm0 > VBROADCASTSS m1, xmm1 > +%endif > + movsxdifnidn widthq, widthd > pxor m6, m6 > mov c0q, [ptrq + 0*gprsize] > mov c1q, [ptrq + 1*gprsize] > @@ -281,7 +282,7 @@ cglobal filter_sobel, 4, 15, 7, dst, width, rdiv, bias, matrix, ptr, c0, c1, c2, > fmaddss xmm4, xmm5, xmm5, xmm4 > > sqrtps xmm4, xmm4 > - fmaddss xmm4, xmm4, xmm0, xmm1 ;sum = sum * rdiv + bias > + fmaddss xmm4, xmm4, xm0, xm1 ;sum = sum * rdiv + bias > cvttps2dq xmm4, xmm4 ; trunc to integer > packssdw xmm4, xmm4 > packuswb xmm4, xmm4 Should be ok.
On Mon, 2022-11-14 at 13:34 -0300, James Almer wrote: > On 11/14/2022 12:20 PM, bin.wang-at-intel.com@ffmpeg.org wrote: > > From: "Wang, Bin" <bin.wang@intel.com> > > > > Signed-off-by: Wang, Bin <bin.wang@intel.com> > > --- > > libavfilter/x86/vf_convolution.asm | 11 ++++++----- > > 1 file changed, 6 insertions(+), 5 deletions(-) > > > > diff --git a/libavfilter/x86/vf_convolution.asm > > b/libavfilter/x86/vf_convolution.asm > > index c912d56752..9ac9ef5d73 100644 > > --- a/libavfilter/x86/vf_convolution.asm > > +++ b/libavfilter/x86/vf_convolution.asm > > @@ -189,15 +189,16 @@ cglobal filter_sobel, 4, 15, 7, dst, width, matrix, > > ptr, c0, c1, c2, c3, c4, c5, > > cglobal filter_sobel, 4, 15, 7, dst, width, rdiv, bias, matrix, ptr, c0, > > c1, c2, c3, c4, c5, c6, c7, c8, r, x > > %endif > > %if WIN64 > > - SWAP xmm0, xmm2 > > - SWAP xmm1, xmm3 > > + VBROADCASTSS m0, xmm2 > > + VBROADCASTSS m1, xmm3 > > mov r2q, matrixmp > > mov r3q, ptrmp > > DEFINE_ARGS dst, width, matrix, ptr, c0, c1, c2, c3, c4, c5, c6, c7, > > c8, r, x > > -%endif > > - movsxdifnidn widthq, widthd > > +%else > > VBROADCASTSS m0, xmm0 > > VBROADCASTSS m1, xmm1 > > +%endif > > + movsxdifnidn widthq, widthd > > pxor m6, m6 > > mov c0q, [ptrq + 0*gprsize] > > mov c1q, [ptrq + 1*gprsize] > > @@ -281,7 +282,7 @@ cglobal filter_sobel, 4, 15, 7, dst, width, rdiv, bias, > > matrix, ptr, c0, c1, c2, > > fmaddss xmm4, xmm5, xmm5, xmm4 > > > > sqrtps xmm4, xmm4 > > - fmaddss xmm4, xmm4, xmm0, xmm1 ;sum = sum * rdiv + bias > > + fmaddss xmm4, xmm4, xm0, xm1 ;sum = sum * rdiv + bias > > cvttps2dq xmm4, xmm4 ; trunc to integer > > packssdw xmm4, xmm4 > > packuswb xmm4, xmm4 > > Should be ok. Applied, -Haihao
diff --git a/libavfilter/x86/vf_convolution.asm b/libavfilter/x86/vf_convolution.asm index c912d56752..9ac9ef5d73 100644 --- a/libavfilter/x86/vf_convolution.asm +++ b/libavfilter/x86/vf_convolution.asm @@ -189,15 +189,16 @@ cglobal filter_sobel, 4, 15, 7, dst, width, matrix, ptr, c0, c1, c2, c3, c4, c5, cglobal filter_sobel, 4, 15, 7, dst, width, rdiv, bias, matrix, ptr, c0, c1, c2, c3, c4, c5, c6, c7, c8, r, x %endif %if WIN64 - SWAP xmm0, xmm2 - SWAP xmm1, xmm3 + VBROADCASTSS m0, xmm2 + VBROADCASTSS m1, xmm3 mov r2q, matrixmp mov r3q, ptrmp DEFINE_ARGS dst, width, matrix, ptr, c0, c1, c2, c3, c4, c5, c6, c7, c8, r, x -%endif - movsxdifnidn widthq, widthd +%else VBROADCASTSS m0, xmm0 VBROADCASTSS m1, xmm1 +%endif + movsxdifnidn widthq, widthd pxor m6, m6 mov c0q, [ptrq + 0*gprsize] mov c1q, [ptrq + 1*gprsize] @@ -281,7 +282,7 @@ cglobal filter_sobel, 4, 15, 7, dst, width, rdiv, bias, matrix, ptr, c0, c1, c2, fmaddss xmm4, xmm5, xmm5, xmm4 sqrtps xmm4, xmm4 - fmaddss xmm4, xmm4, xmm0, xmm1 ;sum = sum * rdiv + bias + fmaddss xmm4, xmm4, xm0, xm1 ;sum = sum * rdiv + bias cvttps2dq xmm4, xmm4 ; trunc to integer packssdw xmm4, xmm4 packuswb xmm4, xmm4