diff mbox

[FFmpeg-devel,3/4] x86util: import MOVHL macro

Message ID 20170216131149.7028-3-jdarnley@obe.tv
State Accepted
Headers show

Commit Message

James Darnley Feb. 16, 2017, 1:11 p.m. UTC
Originally committed to x264 in 1637239a by Henrik Gramner who has
agreed to re-license it as LGPL.  Original commit message follows.

    x86: Avoid some bypass delays and false dependencies

    A bypass delay of 1-3 clock cycles may occur on some CPUs when transitioning
    between int and float domains, so try to avoid that if possible.
---
 libavutil/x86/x86util.asm | 12 ++++++++++++
 1 file changed, 12 insertions(+)

Comments

Paul B Mahol Feb. 16, 2017, 1:27 p.m. UTC | #1
On 2/16/17, James Darnley <jdarnley@obe.tv> wrote:
> Originally committed to x264 in 1637239a by Henrik Gramner who has
> agreed to re-license it as LGPL.  Original commit message follows.
>
>     x86: Avoid some bypass delays and false dependencies
>
>     A bypass delay of 1-3 clock cycles may occur on some CPUs when
> transitioning
>     between int and float domains, so try to avoid that if possible.
> ---
>  libavutil/x86/x86util.asm | 12 ++++++++++++
>  1 file changed, 12 insertions(+)
>
> diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
> index c063436..1408f0a 100644
> --- a/libavutil/x86/x86util.asm
> +++ b/libavutil/x86/x86util.asm
> @@ -876,3 +876,15 @@
>      psrlq   %1, 8*(%2)
>  %endif
>  %endmacro
> +
> +%macro MOVHL 2 ; dst, src
> +%ifidn %1, %2
> +    punpckhqdq %1, %2
> +%elif cpuflag(avx)
> +    punpckhqdq %1, %2, %2
> +%elif cpuflag(sse4)
> +    pshufd     %1, %2, q3232 ; pshufd is slow on some older CPUs, so only
> use it on more modern ones
> +%else
> +    movhlps    %1, %2        ; may cause an int/float domain transition and
> has a dependency on dst
> +%endif
> +%endmacro

lgtm
diff mbox

Patch

diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index c063436..1408f0a 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -876,3 +876,15 @@ 
     psrlq   %1, 8*(%2)
 %endif
 %endmacro
+
+%macro MOVHL 2 ; dst, src
+%ifidn %1, %2
+    punpckhqdq %1, %2
+%elif cpuflag(avx)
+    punpckhqdq %1, %2, %2
+%elif cpuflag(sse4)
+    pshufd     %1, %2, q3232 ; pshufd is slow on some older CPUs, so only use it on more modern ones
+%else
+    movhlps    %1, %2        ; may cause an int/float domain transition and has a dependency on dst
+%endif
+%endmacro