diff mbox

[FFmpeg-devel] avfilter: add hflip x86 SIMD

Message ID 20171201220243.16371-1-onemda@gmail.com
State Superseded
Headers show

Commit Message

Paul B Mahol Dec. 1, 2017, 10:02 p.m. UTC
Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
 libavfilter/hflip.h             | 38 +++++++++++++++++++++++++
 libavfilter/vf_hflip.c          | 30 ++++++++++++++------
 libavfilter/x86/Makefile        |  2 ++
 libavfilter/x86/vf_hflip.asm    | 61 +++++++++++++++++++++++++++++++++++++++++
 libavfilter/x86/vf_hflip_init.c | 38 +++++++++++++++++++++++++
 5 files changed, 160 insertions(+), 9 deletions(-)
 create mode 100644 libavfilter/hflip.h
 create mode 100644 libavfilter/x86/vf_hflip.asm
 create mode 100644 libavfilter/x86/vf_hflip_init.c

Comments

Michael Niedermayer Dec. 2, 2017, 2:13 a.m. UTC | #1
On Fri, Dec 01, 2017 at 11:02:43PM +0100, Paul B Mahol wrote:
> Signed-off-by: Paul B Mahol <onemda@gmail.com>
> ---
>  libavfilter/hflip.h             | 38 +++++++++++++++++++++++++
>  libavfilter/vf_hflip.c          | 30 ++++++++++++++------
>  libavfilter/x86/Makefile        |  2 ++
>  libavfilter/x86/vf_hflip.asm    | 61 +++++++++++++++++++++++++++++++++++++++++
>  libavfilter/x86/vf_hflip_init.c | 38 +++++++++++++++++++++++++
>  5 files changed, 160 insertions(+), 9 deletions(-)
>  create mode 100644 libavfilter/hflip.h
>  create mode 100644 libavfilter/x86/vf_hflip.asm
>  create mode 100644 libavfilter/x86/vf_hflip_init.c

fails to build on x86-32 linux

libavfilter/libavfilter.a(vf_hflip_init.o): In function `ff_hflip_init_x86':
src/libavfilter/x86/vf_hflip_init.c:35: undefined reference to `ff_hflip_byte_ssse3'
collect2: error: ld returned 1 exit status
make: *** [ffmpeg_g] Error 1
make: *** Waiting for unfinished jobs....
libavfilter/libavfilter.a(vf_hflip_init.o): In function `ff_hflip_init_x86':
src/libavfilter/x86/vf_hflip_init.c:35: undefined reference to `ff_hflip_byte_ssse3'
collect2: error: ld returned 1 exit status
make: *** [ffprobe_g] Error 1


[...]
James Almer Dec. 2, 2017, 2:16 a.m. UTC | #2
On 12/1/2017 11:13 PM, Michael Niedermayer wrote:
> On Fri, Dec 01, 2017 at 11:02:43PM +0100, Paul B Mahol wrote:
>> Signed-off-by: Paul B Mahol <onemda@gmail.com>
>> ---
>>  libavfilter/hflip.h             | 38 +++++++++++++++++++++++++
>>  libavfilter/vf_hflip.c          | 30 ++++++++++++++------
>>  libavfilter/x86/Makefile        |  2 ++
>>  libavfilter/x86/vf_hflip.asm    | 61 +++++++++++++++++++++++++++++++++++++++++
>>  libavfilter/x86/vf_hflip_init.c | 38 +++++++++++++++++++++++++
>>  5 files changed, 160 insertions(+), 9 deletions(-)
>>  create mode 100644 libavfilter/hflip.h
>>  create mode 100644 libavfilter/x86/vf_hflip.asm
>>  create mode 100644 libavfilter/x86/vf_hflip_init.c
> 
> fails to build on x86-32 linux
> 
> libavfilter/libavfilter.a(vf_hflip_init.o): In function `ff_hflip_init_x86':
> src/libavfilter/x86/vf_hflip_init.c:35: undefined reference to `ff_hflip_byte_ssse3'
> collect2: error: ld returned 1 exit status
> make: *** [ffmpeg_g] Error 1
> make: *** Waiting for unfinished jobs....
> libavfilter/libavfilter.a(vf_hflip_init.o): In function `ff_hflip_init_x86':
> src/libavfilter/x86/vf_hflip_init.c:35: undefined reference to `ff_hflip_byte_ssse3'
> collect2: error: ld returned 1 exit status
> make: *** [ffprobe_g] Error 1

For some reason the whole asm function is wrapped in a x86_64 check even
though it's not needed.
Guess it was a copy paste mistake.
James Almer Dec. 2, 2017, 2:25 a.m. UTC | #3
On 12/1/2017 7:02 PM, Paul B Mahol wrote:
> Signed-off-by: Paul B Mahol <onemda@gmail.com>
> ---
>  libavfilter/hflip.h             | 38 +++++++++++++++++++++++++
>  libavfilter/vf_hflip.c          | 30 ++++++++++++++------
>  libavfilter/x86/Makefile        |  2 ++
>  libavfilter/x86/vf_hflip.asm    | 61 +++++++++++++++++++++++++++++++++++++++++
>  libavfilter/x86/vf_hflip_init.c | 38 +++++++++++++++++++++++++
>  5 files changed, 160 insertions(+), 9 deletions(-)
>  create mode 100644 libavfilter/hflip.h
>  create mode 100644 libavfilter/x86/vf_hflip.asm
>  create mode 100644 libavfilter/x86/vf_hflip_init.c
> 
> diff --git a/libavfilter/hflip.h b/libavfilter/hflip.h
> new file mode 100644
> index 0000000000..138380427c
> --- /dev/null
> +++ b/libavfilter/hflip.h
> @@ -0,0 +1,38 @@
> +/*
> + * Copyright (c) 2007 Benoit Fouet
> + * Copyright (c) 2010 Stefano Sabatini
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVFILTER_HFLIP_H
> +#define AVFILTER_HFLIP_H
> +
> +#include "avfilter.h"
> +
> +typedef struct FlipContext {
> +    const AVClass *class;
> +    int max_step[4];    ///< max pixel step for each plane, expressed as a number of bytes
> +    int planewidth[4];  ///< width of each plane
> +    int planeheight[4]; ///< height of each plane
> +
> +    void (*flip_line[4])(const uint8_t *src, uint8_t *dst, int w);
> +} FlipContext;
> +
> +void ff_hflip_init_x86(FlipContext *s, int step[4]);
> +
> +#endif /* AVFILTER_HFLIP_H */
> diff --git a/libavfilter/vf_hflip.c b/libavfilter/vf_hflip.c
> index cf20c193f7..65cf7c5cd1 100644
> --- a/libavfilter/vf_hflip.c
> +++ b/libavfilter/vf_hflip.c
> @@ -29,6 +29,7 @@
>  #include "libavutil/opt.h"
>  #include "avfilter.h"
>  #include "formats.h"
> +#include "hflip.h"
>  #include "internal.h"
>  #include "video.h"
>  #include "libavutil/pixdesc.h"
> @@ -36,13 +37,6 @@
>  #include "libavutil/intreadwrite.h"
>  #include "libavutil/imgutils.h"
>  
> -typedef struct FlipContext {
> -    const AVClass *class;
> -    int max_step[4];    ///< max pixel step for each plane, expressed as a number of bytes
> -    int planewidth[4];  ///< width of each plane
> -    int planeheight[4]; ///< height of each plane
> -} FlipContext;
> -
>  static const AVOption hflip_options[] = {
>      { NULL }
>  };
> @@ -67,12 +61,21 @@ static int query_formats(AVFilterContext *ctx)
>      return ff_set_common_formats(ctx, pix_fmts);
>  }
>  
> +static void hflip_byte_c(const uint8_t *src, uint8_t *dst, int w)
> +{
> +    int j;
> +
> +    for (j = 0; j < w; j++)
> +        dst[j] = src[-j];
> +}
> +
>  static int config_props(AVFilterLink *inlink)
>  {
>      FlipContext *s = inlink->dst->priv;
>      const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(inlink->format);
>      const int hsub = pix_desc->log2_chroma_w;
>      const int vsub = pix_desc->log2_chroma_h;
> +    int i;
>  
>      av_image_fill_max_pixsteps(s->max_step, NULL, pix_desc);
>      s->planewidth[0]  = s->planewidth[3]  = inlink->w;
> @@ -80,6 +83,16 @@ static int config_props(AVFilterLink *inlink)
>      s->planeheight[0] = s->planeheight[3] = inlink->h;
>      s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, vsub);
>  
> +    for (i = 0; i < 4; i++) {
> +        switch (s->max_step[i]) {
> +        case 1:
> +            s->flip_line[i] = hflip_byte_c;
> +        }
> +    }
> +
> +    if (ARCH_X86)
> +        ff_hflip_init_x86(s, s->max_step);
> +
>      return 0;
>  }
>  
> @@ -109,8 +122,7 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int job, int nb_jobs)
>          for (i = start; i < end; i++) {
>              switch (step) {
>              case 1:
> -                for (j = 0; j < width; j++)
> -                    outrow[j] = inrow[-j];
> +                s->flip_line[plane](inrow, outrow, width);
>              break;
>  
>              case 2:
> diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile
> index 3431625883..1420954f62 100644
> --- a/libavfilter/x86/Makefile
> +++ b/libavfilter/x86/Makefile
> @@ -5,6 +5,7 @@ OBJS-$(CONFIG_COLORSPACE_FILTER)             += x86/colorspacedsp_init.o
>  OBJS-$(CONFIG_EQ_FILTER)                     += x86/vf_eq.o
>  OBJS-$(CONFIG_FSPP_FILTER)                   += x86/vf_fspp_init.o
>  OBJS-$(CONFIG_GRADFUN_FILTER)                += x86/vf_gradfun_init.o
> +OBJS-$(CONFIG_HFLIP_FILTER)                  += x86/vf_hflip_init.o
>  OBJS-$(CONFIG_HQDN3D_FILTER)                 += x86/vf_hqdn3d_init.o
>  OBJS-$(CONFIG_IDET_FILTER)                   += x86/vf_idet_init.o
>  OBJS-$(CONFIG_INTERLACE_FILTER)              += x86/vf_interlace_init.o
> @@ -31,6 +32,7 @@ X86ASM-OBJS-$(CONFIG_BWDIF_FILTER)           += x86/vf_bwdif.o
>  X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER)      += x86/colorspacedsp.o
>  X86ASM-OBJS-$(CONFIG_FSPP_FILTER)            += x86/vf_fspp.o
>  X86ASM-OBJS-$(CONFIG_GRADFUN_FILTER)         += x86/vf_gradfun.o
> +X86ASM-OBJS-$(CONFIG_HFLIP_FILTER)           += x86/vf_hflip.o
>  X86ASM-OBJS-$(CONFIG_HQDN3D_FILTER)          += x86/vf_hqdn3d.o
>  X86ASM-OBJS-$(CONFIG_IDET_FILTER)            += x86/vf_idet.o
>  X86ASM-OBJS-$(CONFIG_INTERLACE_FILTER)       += x86/vf_interlace.o
> diff --git a/libavfilter/x86/vf_hflip.asm b/libavfilter/x86/vf_hflip.asm
> new file mode 100644
> index 0000000000..bc52a16ad8
> --- /dev/null
> +++ b/libavfilter/x86/vf_hflip.asm
> @@ -0,0 +1,61 @@
> +;*****************************************************************************
> +;* x86-optimized functions for hflip filter
> +;*
> +;* Copyright (C) 2017 Paul B Mahol
> +;*
> +;* This file is part of FFmpeg.
> +;*
> +;* FFmpeg is free software; you can redistribute it and/or
> +;* modify it under the terms of the GNU Lesser General Public
> +;* License as published by the Free Software Foundation; either
> +;* version 2.1 of the License, or (at your option) any later version.
> +;*
> +;* FFmpeg is distributed in the hope that it will be useful,
> +;* but WITHOUT ANY WARRANTY; without even the implied warranty of
> +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +;* Lesser General Public License for more details.
> +;*
> +;* You should have received a copy of the GNU Lesser General Public
> +;* License along with FFmpeg; if not, write to the Free Software
> +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> +;*****************************************************************************
> +
> +%include "libavutil/x86/x86util.asm"
> +
> +SECTION_RODATA
> +
> +pb_flip: times 16 db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
> +
> +SECTION .text
> +
> +INIT_XMM ssse3
> +%if ARCH_X86_64

Unneeded.

> +cglobal hflip_byte, 5, 5, 8, src, dst, w, x, v

There are three arguments, not five. Also, only two xmm regs are being used.

> +    mova    m0, [pb_flip]
> +    mov     xq, 0
> +    sub     wq, mmsize
> +    cmp     wq, mmsize
> +    jl .skip
> +
> +    .loop0:
> +        neg     xq
> +        movu    m1, [srcq + xq - mmsize + 1]
> +        pshufb  m1, m0
> +        neg     xq
> +        movu    [dstq + xq], m1
> +        add     xq, mmsize
> +        cmp     xq, wq
> +        jl .loop0
> +
> +.skip:
> +    add     wq, mmsize
> +    .loop1:
> +        neg    xq
> +        mov    vb, [srcq + xq]
> +        neg    xq
> +        mov    [dstq + xq], vb
> +        add    xq, 1
> +        cmp    xq, wq
> +        jl .loop1
> +RET
> +%endif

No comments about the assembly. Rostislav mentioned on IRC you can do it
in a more efficient way, so poke him about it.
diff mbox

Patch

diff --git a/libavfilter/hflip.h b/libavfilter/hflip.h
new file mode 100644
index 0000000000..138380427c
--- /dev/null
+++ b/libavfilter/hflip.h
@@ -0,0 +1,38 @@ 
+/*
+ * Copyright (c) 2007 Benoit Fouet
+ * Copyright (c) 2010 Stefano Sabatini
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_HFLIP_H
+#define AVFILTER_HFLIP_H
+
+#include "avfilter.h"
+
+typedef struct FlipContext {
+    const AVClass *class;
+    int max_step[4];    ///< max pixel step for each plane, expressed as a number of bytes
+    int planewidth[4];  ///< width of each plane
+    int planeheight[4]; ///< height of each plane
+
+    void (*flip_line[4])(const uint8_t *src, uint8_t *dst, int w);
+} FlipContext;
+
+void ff_hflip_init_x86(FlipContext *s, int step[4]);
+
+#endif /* AVFILTER_HFLIP_H */
diff --git a/libavfilter/vf_hflip.c b/libavfilter/vf_hflip.c
index cf20c193f7..65cf7c5cd1 100644
--- a/libavfilter/vf_hflip.c
+++ b/libavfilter/vf_hflip.c
@@ -29,6 +29,7 @@ 
 #include "libavutil/opt.h"
 #include "avfilter.h"
 #include "formats.h"
+#include "hflip.h"
 #include "internal.h"
 #include "video.h"
 #include "libavutil/pixdesc.h"
@@ -36,13 +37,6 @@ 
 #include "libavutil/intreadwrite.h"
 #include "libavutil/imgutils.h"
 
-typedef struct FlipContext {
-    const AVClass *class;
-    int max_step[4];    ///< max pixel step for each plane, expressed as a number of bytes
-    int planewidth[4];  ///< width of each plane
-    int planeheight[4]; ///< height of each plane
-} FlipContext;
-
 static const AVOption hflip_options[] = {
     { NULL }
 };
@@ -67,12 +61,21 @@  static int query_formats(AVFilterContext *ctx)
     return ff_set_common_formats(ctx, pix_fmts);
 }
 
+static void hflip_byte_c(const uint8_t *src, uint8_t *dst, int w)
+{
+    int j;
+
+    for (j = 0; j < w; j++)
+        dst[j] = src[-j];
+}
+
 static int config_props(AVFilterLink *inlink)
 {
     FlipContext *s = inlink->dst->priv;
     const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(inlink->format);
     const int hsub = pix_desc->log2_chroma_w;
     const int vsub = pix_desc->log2_chroma_h;
+    int i;
 
     av_image_fill_max_pixsteps(s->max_step, NULL, pix_desc);
     s->planewidth[0]  = s->planewidth[3]  = inlink->w;
@@ -80,6 +83,16 @@  static int config_props(AVFilterLink *inlink)
     s->planeheight[0] = s->planeheight[3] = inlink->h;
     s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, vsub);
 
+    for (i = 0; i < 4; i++) {
+        switch (s->max_step[i]) {
+        case 1:
+            s->flip_line[i] = hflip_byte_c;
+        }
+    }
+
+    if (ARCH_X86)
+        ff_hflip_init_x86(s, s->max_step);
+
     return 0;
 }
 
@@ -109,8 +122,7 @@  static int filter_slice(AVFilterContext *ctx, void *arg, int job, int nb_jobs)
         for (i = start; i < end; i++) {
             switch (step) {
             case 1:
-                for (j = 0; j < width; j++)
-                    outrow[j] = inrow[-j];
+                s->flip_line[plane](inrow, outrow, width);
             break;
 
             case 2:
diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile
index 3431625883..1420954f62 100644
--- a/libavfilter/x86/Makefile
+++ b/libavfilter/x86/Makefile
@@ -5,6 +5,7 @@  OBJS-$(CONFIG_COLORSPACE_FILTER)             += x86/colorspacedsp_init.o
 OBJS-$(CONFIG_EQ_FILTER)                     += x86/vf_eq.o
 OBJS-$(CONFIG_FSPP_FILTER)                   += x86/vf_fspp_init.o
 OBJS-$(CONFIG_GRADFUN_FILTER)                += x86/vf_gradfun_init.o
+OBJS-$(CONFIG_HFLIP_FILTER)                  += x86/vf_hflip_init.o
 OBJS-$(CONFIG_HQDN3D_FILTER)                 += x86/vf_hqdn3d_init.o
 OBJS-$(CONFIG_IDET_FILTER)                   += x86/vf_idet_init.o
 OBJS-$(CONFIG_INTERLACE_FILTER)              += x86/vf_interlace_init.o
@@ -31,6 +32,7 @@  X86ASM-OBJS-$(CONFIG_BWDIF_FILTER)           += x86/vf_bwdif.o
 X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER)      += x86/colorspacedsp.o
 X86ASM-OBJS-$(CONFIG_FSPP_FILTER)            += x86/vf_fspp.o
 X86ASM-OBJS-$(CONFIG_GRADFUN_FILTER)         += x86/vf_gradfun.o
+X86ASM-OBJS-$(CONFIG_HFLIP_FILTER)           += x86/vf_hflip.o
 X86ASM-OBJS-$(CONFIG_HQDN3D_FILTER)          += x86/vf_hqdn3d.o
 X86ASM-OBJS-$(CONFIG_IDET_FILTER)            += x86/vf_idet.o
 X86ASM-OBJS-$(CONFIG_INTERLACE_FILTER)       += x86/vf_interlace.o
diff --git a/libavfilter/x86/vf_hflip.asm b/libavfilter/x86/vf_hflip.asm
new file mode 100644
index 0000000000..bc52a16ad8
--- /dev/null
+++ b/libavfilter/x86/vf_hflip.asm
@@ -0,0 +1,61 @@ 
+;*****************************************************************************
+;* x86-optimized functions for hflip filter
+;*
+;* Copyright (C) 2017 Paul B Mahol
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;*****************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+pb_flip: times 16 db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+
+SECTION .text
+
+INIT_XMM ssse3
+%if ARCH_X86_64
+cglobal hflip_byte, 5, 5, 8, src, dst, w, x, v
+    mova    m0, [pb_flip]
+    mov     xq, 0
+    sub     wq, mmsize
+    cmp     wq, mmsize
+    jl .skip
+
+    .loop0:
+        neg     xq
+        movu    m1, [srcq + xq - mmsize + 1]
+        pshufb  m1, m0
+        neg     xq
+        movu    [dstq + xq], m1
+        add     xq, mmsize
+        cmp     xq, wq
+        jl .loop0
+
+.skip:
+    add     wq, mmsize
+    .loop1:
+        neg    xq
+        mov    vb, [srcq + xq]
+        neg    xq
+        mov    [dstq + xq], vb
+        add    xq, 1
+        cmp    xq, wq
+        jl .loop1
+RET
+%endif
diff --git a/libavfilter/x86/vf_hflip_init.c b/libavfilter/x86/vf_hflip_init.c
new file mode 100644
index 0000000000..cd0e18f7ee
--- /dev/null
+++ b/libavfilter/x86/vf_hflip_init.c
@@ -0,0 +1,38 @@ 
+/*
+ * Copyright (c) 2017 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavfilter/hflip.h"
+
+void ff_hflip_byte_ssse3(const uint8_t *src, uint8_t *dst, int w);
+
+av_cold void ff_hflip_init_x86(FlipContext *s, int step[4])
+{
+    int cpu_flags = av_get_cpu_flags();
+    int i;
+
+    for (i = 0; i < 4; i++) {
+        if (EXTERNAL_SSSE3(cpu_flags) && step[i] == 1) {
+            s->flip_line[i] = ff_hflip_byte_ssse3;
+        }
+    }
+}