diff mbox

[FFmpeg-devel] x86/vf_transpose: make ff_transpose_8x8_16_sse2 work on x86_32

Message ID 20191021200618.10078-1-jamrial@gmail.com
State Accepted
Commit 738bc3e7420751a7686040e7248efad13fcbc86b
Headers show

Commit Message

James Almer Oct. 21, 2019, 8:06 p.m. UTC
Signed-off-by: James Almer <jamrial@gmail.com>
---
 libavfilter/x86/vf_transpose.asm    | 11 +++++------
 libavfilter/x86/vf_transpose_init.c |  2 +-
 2 files changed, 6 insertions(+), 7 deletions(-)

Comments

Paul B Mahol Oct. 22, 2019, 4:25 p.m. UTC | #1
probably ok

On 10/21/19, James Almer <jamrial@gmail.com> wrote:
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
>  libavfilter/x86/vf_transpose.asm    | 11 +++++------
>  libavfilter/x86/vf_transpose_init.c |  2 +-
>  2 files changed, 6 insertions(+), 7 deletions(-)
>
> diff --git a/libavfilter/x86/vf_transpose.asm
> b/libavfilter/x86/vf_transpose.asm
> index f9f585369a..c532c899ee 100644
> --- a/libavfilter/x86/vf_transpose.asm
> +++ b/libavfilter/x86/vf_transpose.asm
> @@ -56,10 +56,7 @@ cglobal transpose_8x8_8, 4,5,8, src, src_linesize, dst,
> dst_linesize, linesize3
>      movq [dstq + linesize3q], m7
>      RET
>
> -%if ARCH_X86_64
> -
> -INIT_XMM sse2
> -cglobal transpose_8x8_16, 4,5,9, src, src_linesize, dst, dst_linesize,
> linesize3
> +cglobal transpose_8x8_16, 4,5,9, ARCH_X86_32 * 32, src, src_linesize, dst,
> dst_linesize, linesize3
>      lea     linesize3q, [src_linesizeq * 3]
>      movu    m0, [srcq + src_linesizeq * 0]
>      movu    m1, [srcq + src_linesizeq * 1]
> @@ -71,7 +68,11 @@ cglobal transpose_8x8_16, 4,5,9, src, src_linesize, dst,
> dst_linesize, linesize3
>      movu    m6, [srcq + src_linesizeq * 2]
>      movu    m7, [srcq + linesize3q]
>
> +%if ARCH_X86_64
>      TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, 8
> +%else
> +    TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, [rsp], [rsp + 16]
> +%endif
>
>      lea                  linesize3q, [dst_linesizeq * 3]
>      movu [dstq + dst_linesizeq * 0], m0
> @@ -84,5 +85,3 @@ cglobal transpose_8x8_16, 4,5,9, src, src_linesize, dst,
> dst_linesize, linesize3
>      movu [dstq + dst_linesizeq * 2], m6
>      movu [dstq + linesize3q], m7
>      RET
> -
> -%endif
> diff --git a/libavfilter/x86/vf_transpose_init.c
> b/libavfilter/x86/vf_transpose_init.c
> index f1a9cd058b..6bb9908725 100644
> --- a/libavfilter/x86/vf_transpose_init.c
> +++ b/libavfilter/x86/vf_transpose_init.c
> @@ -43,7 +43,7 @@ av_cold void ff_transpose_init_x86(TransVtable *v, int
> pixstep)
>          v->transpose_8x8 = ff_transpose_8x8_8_sse2;
>      }
>
> -    if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags) && pixstep == 2) {
> +    if (EXTERNAL_SSE2(cpu_flags) && pixstep == 2) {
>          v->transpose_8x8 = ff_transpose_8x8_16_sse2;
>      }
>  }
> --
> 2.23.0
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
James Almer Oct. 22, 2019, 4:58 p.m. UTC | #2
On 10/22/2019 1:25 PM, Paul B Mahol wrote:
> probably ok

Applied, thanks.
diff mbox

Patch

diff --git a/libavfilter/x86/vf_transpose.asm b/libavfilter/x86/vf_transpose.asm
index f9f585369a..c532c899ee 100644
--- a/libavfilter/x86/vf_transpose.asm
+++ b/libavfilter/x86/vf_transpose.asm
@@ -56,10 +56,7 @@  cglobal transpose_8x8_8, 4,5,8, src, src_linesize, dst, dst_linesize, linesize3
     movq [dstq + linesize3q], m7
     RET
 
-%if ARCH_X86_64
-
-INIT_XMM sse2
-cglobal transpose_8x8_16, 4,5,9, src, src_linesize, dst, dst_linesize, linesize3
+cglobal transpose_8x8_16, 4,5,9, ARCH_X86_32 * 32, src, src_linesize, dst, dst_linesize, linesize3
     lea     linesize3q, [src_linesizeq * 3]
     movu    m0, [srcq + src_linesizeq * 0]
     movu    m1, [srcq + src_linesizeq * 1]
@@ -71,7 +68,11 @@  cglobal transpose_8x8_16, 4,5,9, src, src_linesize, dst, dst_linesize, linesize3
     movu    m6, [srcq + src_linesizeq * 2]
     movu    m7, [srcq + linesize3q]
 
+%if ARCH_X86_64
     TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, 8
+%else
+    TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, [rsp], [rsp + 16]
+%endif
 
     lea                  linesize3q, [dst_linesizeq * 3]
     movu [dstq + dst_linesizeq * 0], m0
@@ -84,5 +85,3 @@  cglobal transpose_8x8_16, 4,5,9, src, src_linesize, dst, dst_linesize, linesize3
     movu [dstq + dst_linesizeq * 2], m6
     movu [dstq + linesize3q], m7
     RET
-
-%endif
diff --git a/libavfilter/x86/vf_transpose_init.c b/libavfilter/x86/vf_transpose_init.c
index f1a9cd058b..6bb9908725 100644
--- a/libavfilter/x86/vf_transpose_init.c
+++ b/libavfilter/x86/vf_transpose_init.c
@@ -43,7 +43,7 @@  av_cold void ff_transpose_init_x86(TransVtable *v, int pixstep)
         v->transpose_8x8 = ff_transpose_8x8_8_sse2;
     }
 
-    if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags) && pixstep == 2) {
+    if (EXTERNAL_SSE2(cpu_flags) && pixstep == 2) {
         v->transpose_8x8 = ff_transpose_8x8_16_sse2;
     }
 }