Message ID | 20171201220243.16371-1-onemda@gmail.com |
---|---|
State | Superseded |
Headers | show |
On Fri, Dec 01, 2017 at 11:02:43PM +0100, Paul B Mahol wrote: > Signed-off-by: Paul B Mahol <onemda@gmail.com> > --- > libavfilter/hflip.h | 38 +++++++++++++++++++++++++ > libavfilter/vf_hflip.c | 30 ++++++++++++++------ > libavfilter/x86/Makefile | 2 ++ > libavfilter/x86/vf_hflip.asm | 61 +++++++++++++++++++++++++++++++++++++++++ > libavfilter/x86/vf_hflip_init.c | 38 +++++++++++++++++++++++++ > 5 files changed, 160 insertions(+), 9 deletions(-) > create mode 100644 libavfilter/hflip.h > create mode 100644 libavfilter/x86/vf_hflip.asm > create mode 100644 libavfilter/x86/vf_hflip_init.c fails to build on x86-32 linux libavfilter/libavfilter.a(vf_hflip_init.o): In function `ff_hflip_init_x86': src/libavfilter/x86/vf_hflip_init.c:35: undefined reference to `ff_hflip_byte_ssse3' collect2: error: ld returned 1 exit status make: *** [ffmpeg_g] Error 1 make: *** Waiting for unfinished jobs.... libavfilter/libavfilter.a(vf_hflip_init.o): In function `ff_hflip_init_x86': src/libavfilter/x86/vf_hflip_init.c:35: undefined reference to `ff_hflip_byte_ssse3' collect2: error: ld returned 1 exit status make: *** [ffprobe_g] Error 1 [...]
On 12/1/2017 11:13 PM, Michael Niedermayer wrote: > On Fri, Dec 01, 2017 at 11:02:43PM +0100, Paul B Mahol wrote: >> Signed-off-by: Paul B Mahol <onemda@gmail.com> >> --- >> libavfilter/hflip.h | 38 +++++++++++++++++++++++++ >> libavfilter/vf_hflip.c | 30 ++++++++++++++------ >> libavfilter/x86/Makefile | 2 ++ >> libavfilter/x86/vf_hflip.asm | 61 +++++++++++++++++++++++++++++++++++++++++ >> libavfilter/x86/vf_hflip_init.c | 38 +++++++++++++++++++++++++ >> 5 files changed, 160 insertions(+), 9 deletions(-) >> create mode 100644 libavfilter/hflip.h >> create mode 100644 libavfilter/x86/vf_hflip.asm >> create mode 100644 libavfilter/x86/vf_hflip_init.c > > fails to build on x86-32 linux > > libavfilter/libavfilter.a(vf_hflip_init.o): In function `ff_hflip_init_x86': > src/libavfilter/x86/vf_hflip_init.c:35: undefined reference to `ff_hflip_byte_ssse3' > collect2: error: ld returned 1 exit status > make: *** [ffmpeg_g] Error 1 > make: *** Waiting for unfinished jobs.... > libavfilter/libavfilter.a(vf_hflip_init.o): In function `ff_hflip_init_x86': > src/libavfilter/x86/vf_hflip_init.c:35: undefined reference to `ff_hflip_byte_ssse3' > collect2: error: ld returned 1 exit status > make: *** [ffprobe_g] Error 1 For some reason the whole asm function is wrapped in a x86_64 check even though it's not needed. Guess it was a copy paste mistake.
On 12/1/2017 7:02 PM, Paul B Mahol wrote: > Signed-off-by: Paul B Mahol <onemda@gmail.com> > --- > libavfilter/hflip.h | 38 +++++++++++++++++++++++++ > libavfilter/vf_hflip.c | 30 ++++++++++++++------ > libavfilter/x86/Makefile | 2 ++ > libavfilter/x86/vf_hflip.asm | 61 +++++++++++++++++++++++++++++++++++++++++ > libavfilter/x86/vf_hflip_init.c | 38 +++++++++++++++++++++++++ > 5 files changed, 160 insertions(+), 9 deletions(-) > create mode 100644 libavfilter/hflip.h > create mode 100644 libavfilter/x86/vf_hflip.asm > create mode 100644 libavfilter/x86/vf_hflip_init.c > > diff --git a/libavfilter/hflip.h b/libavfilter/hflip.h > new file mode 100644 > index 0000000000..138380427c > --- /dev/null > +++ b/libavfilter/hflip.h > @@ -0,0 +1,38 @@ > +/* > + * Copyright (c) 2007 Benoit Fouet > + * Copyright (c) 2010 Stefano Sabatini > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > + */ > + > +#ifndef AVFILTER_HFLIP_H > +#define AVFILTER_HFLIP_H > + > +#include "avfilter.h" > + > +typedef struct FlipContext { > + const AVClass *class; > + int max_step[4]; ///< max pixel step for each plane, expressed as a number of bytes > + int planewidth[4]; ///< width of each plane > + int planeheight[4]; ///< height of each plane > + > + void (*flip_line[4])(const uint8_t *src, uint8_t *dst, int w); > +} FlipContext; > + > +void ff_hflip_init_x86(FlipContext *s, int step[4]); > + > +#endif /* AVFILTER_HFLIP_H */ > diff --git a/libavfilter/vf_hflip.c b/libavfilter/vf_hflip.c > index cf20c193f7..65cf7c5cd1 100644 > --- a/libavfilter/vf_hflip.c > +++ b/libavfilter/vf_hflip.c > @@ -29,6 +29,7 @@ > #include "libavutil/opt.h" > #include "avfilter.h" > #include "formats.h" > +#include "hflip.h" > #include "internal.h" > #include "video.h" > #include "libavutil/pixdesc.h" > @@ -36,13 +37,6 @@ > #include "libavutil/intreadwrite.h" > #include "libavutil/imgutils.h" > > -typedef struct FlipContext { > - const AVClass *class; > - int max_step[4]; ///< max pixel step for each plane, expressed as a number of bytes > - int planewidth[4]; ///< width of each plane > - int planeheight[4]; ///< height of each plane > -} FlipContext; > - > static const AVOption hflip_options[] = { > { NULL } > }; > @@ -67,12 +61,21 @@ static int query_formats(AVFilterContext *ctx) > return ff_set_common_formats(ctx, pix_fmts); > } > > +static void hflip_byte_c(const uint8_t *src, uint8_t *dst, int w) > +{ > + int j; > + > + for (j = 0; j < w; j++) > + dst[j] = src[-j]; > +} > + > static int config_props(AVFilterLink *inlink) > { > FlipContext *s = inlink->dst->priv; > const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(inlink->format); > const int hsub = pix_desc->log2_chroma_w; > const int vsub = pix_desc->log2_chroma_h; > + int i; > > av_image_fill_max_pixsteps(s->max_step, NULL, pix_desc); > s->planewidth[0] = s->planewidth[3] = inlink->w; > @@ -80,6 +83,16 @@ static int config_props(AVFilterLink *inlink) > s->planeheight[0] = s->planeheight[3] = inlink->h; > s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, vsub); > > + for (i = 0; i < 4; i++) { > + switch (s->max_step[i]) { > + case 1: > + s->flip_line[i] = hflip_byte_c; > + } > + } > + > + if (ARCH_X86) > + ff_hflip_init_x86(s, s->max_step); > + > return 0; > } > > @@ -109,8 +122,7 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int job, int nb_jobs) > for (i = start; i < end; i++) { > switch (step) { > case 1: > - for (j = 0; j < width; j++) > - outrow[j] = inrow[-j]; > + s->flip_line[plane](inrow, outrow, width); > break; > > case 2: > diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile > index 3431625883..1420954f62 100644 > --- a/libavfilter/x86/Makefile > +++ b/libavfilter/x86/Makefile > @@ -5,6 +5,7 @@ OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp_init.o > OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq.o > OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp_init.o > OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun_init.o > +OBJS-$(CONFIG_HFLIP_FILTER) += x86/vf_hflip_init.o > OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d_init.o > OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet_init.o > OBJS-$(CONFIG_INTERLACE_FILTER) += x86/vf_interlace_init.o > @@ -31,6 +32,7 @@ X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o > X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp.o > X86ASM-OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp.o > X86ASM-OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun.o > +X86ASM-OBJS-$(CONFIG_HFLIP_FILTER) += x86/vf_hflip.o > X86ASM-OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d.o > X86ASM-OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet.o > X86ASM-OBJS-$(CONFIG_INTERLACE_FILTER) += x86/vf_interlace.o > diff --git a/libavfilter/x86/vf_hflip.asm b/libavfilter/x86/vf_hflip.asm > new file mode 100644 > index 0000000000..bc52a16ad8 > --- /dev/null > +++ b/libavfilter/x86/vf_hflip.asm > @@ -0,0 +1,61 @@ > +;***************************************************************************** > +;* x86-optimized functions for hflip filter > +;* > +;* Copyright (C) 2017 Paul B Mahol > +;* > +;* This file is part of FFmpeg. > +;* > +;* FFmpeg is free software; you can redistribute it and/or > +;* modify it under the terms of the GNU Lesser General Public > +;* License as published by the Free Software Foundation; either > +;* version 2.1 of the License, or (at your option) any later version. > +;* > +;* FFmpeg is distributed in the hope that it will be useful, > +;* but WITHOUT ANY WARRANTY; without even the implied warranty of > +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +;* Lesser General Public License for more details. > +;* > +;* You should have received a copy of the GNU Lesser General Public > +;* License along with FFmpeg; if not, write to the Free Software > +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > +;***************************************************************************** > + > +%include "libavutil/x86/x86util.asm" > + > +SECTION_RODATA > + > +pb_flip: times 16 db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 > + > +SECTION .text > + > +INIT_XMM ssse3 > +%if ARCH_X86_64 Unneeded. > +cglobal hflip_byte, 5, 5, 8, src, dst, w, x, v There are three arguments, not five. Also, only two xmm regs are being used. > + mova m0, [pb_flip] > + mov xq, 0 > + sub wq, mmsize > + cmp wq, mmsize > + jl .skip > + > + .loop0: > + neg xq > + movu m1, [srcq + xq - mmsize + 1] > + pshufb m1, m0 > + neg xq > + movu [dstq + xq], m1 > + add xq, mmsize > + cmp xq, wq > + jl .loop0 > + > +.skip: > + add wq, mmsize > + .loop1: > + neg xq > + mov vb, [srcq + xq] > + neg xq > + mov [dstq + xq], vb > + add xq, 1 > + cmp xq, wq > + jl .loop1 > +RET > +%endif No comments about the assembly. Rostislav mentioned on IRC you can do it in a more efficient way, so poke him about it.
diff --git a/libavfilter/hflip.h b/libavfilter/hflip.h new file mode 100644 index 0000000000..138380427c --- /dev/null +++ b/libavfilter/hflip.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2007 Benoit Fouet + * Copyright (c) 2010 Stefano Sabatini + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVFILTER_HFLIP_H +#define AVFILTER_HFLIP_H + +#include "avfilter.h" + +typedef struct FlipContext { + const AVClass *class; + int max_step[4]; ///< max pixel step for each plane, expressed as a number of bytes + int planewidth[4]; ///< width of each plane + int planeheight[4]; ///< height of each plane + + void (*flip_line[4])(const uint8_t *src, uint8_t *dst, int w); +} FlipContext; + +void ff_hflip_init_x86(FlipContext *s, int step[4]); + +#endif /* AVFILTER_HFLIP_H */ diff --git a/libavfilter/vf_hflip.c b/libavfilter/vf_hflip.c index cf20c193f7..65cf7c5cd1 100644 --- a/libavfilter/vf_hflip.c +++ b/libavfilter/vf_hflip.c @@ -29,6 +29,7 @@ #include "libavutil/opt.h" #include "avfilter.h" #include "formats.h" +#include "hflip.h" #include "internal.h" #include "video.h" #include "libavutil/pixdesc.h" @@ -36,13 +37,6 @@ #include "libavutil/intreadwrite.h" #include "libavutil/imgutils.h" -typedef struct FlipContext { - const AVClass *class; - int max_step[4]; ///< max pixel step for each plane, expressed as a number of bytes - int planewidth[4]; ///< width of each plane - int planeheight[4]; ///< height of each plane -} FlipContext; - static const AVOption hflip_options[] = { { NULL } }; @@ -67,12 +61,21 @@ static int query_formats(AVFilterContext *ctx) return ff_set_common_formats(ctx, pix_fmts); } +static void hflip_byte_c(const uint8_t *src, uint8_t *dst, int w) +{ + int j; + + for (j = 0; j < w; j++) + dst[j] = src[-j]; +} + static int config_props(AVFilterLink *inlink) { FlipContext *s = inlink->dst->priv; const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(inlink->format); const int hsub = pix_desc->log2_chroma_w; const int vsub = pix_desc->log2_chroma_h; + int i; av_image_fill_max_pixsteps(s->max_step, NULL, pix_desc); s->planewidth[0] = s->planewidth[3] = inlink->w; @@ -80,6 +83,16 @@ static int config_props(AVFilterLink *inlink) s->planeheight[0] = s->planeheight[3] = inlink->h; s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, vsub); + for (i = 0; i < 4; i++) { + switch (s->max_step[i]) { + case 1: + s->flip_line[i] = hflip_byte_c; + } + } + + if (ARCH_X86) + ff_hflip_init_x86(s, s->max_step); + return 0; } @@ -109,8 +122,7 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int job, int nb_jobs) for (i = start; i < end; i++) { switch (step) { case 1: - for (j = 0; j < width; j++) - outrow[j] = inrow[-j]; + s->flip_line[plane](inrow, outrow, width); break; case 2: diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile index 3431625883..1420954f62 100644 --- a/libavfilter/x86/Makefile +++ b/libavfilter/x86/Makefile @@ -5,6 +5,7 @@ OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp_init.o OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq.o OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp_init.o OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun_init.o +OBJS-$(CONFIG_HFLIP_FILTER) += x86/vf_hflip_init.o OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d_init.o OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet_init.o OBJS-$(CONFIG_INTERLACE_FILTER) += x86/vf_interlace_init.o @@ -31,6 +32,7 @@ X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp.o X86ASM-OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp.o X86ASM-OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun.o +X86ASM-OBJS-$(CONFIG_HFLIP_FILTER) += x86/vf_hflip.o X86ASM-OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d.o X86ASM-OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet.o X86ASM-OBJS-$(CONFIG_INTERLACE_FILTER) += x86/vf_interlace.o diff --git a/libavfilter/x86/vf_hflip.asm b/libavfilter/x86/vf_hflip.asm new file mode 100644 index 0000000000..bc52a16ad8 --- /dev/null +++ b/libavfilter/x86/vf_hflip.asm @@ -0,0 +1,61 @@ +;***************************************************************************** +;* x86-optimized functions for hflip filter +;* +;* Copyright (C) 2017 Paul B Mahol +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;***************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION_RODATA + +pb_flip: times 16 db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 + +SECTION .text + +INIT_XMM ssse3 +%if ARCH_X86_64 +cglobal hflip_byte, 5, 5, 8, src, dst, w, x, v + mova m0, [pb_flip] + mov xq, 0 + sub wq, mmsize + cmp wq, mmsize + jl .skip + + .loop0: + neg xq + movu m1, [srcq + xq - mmsize + 1] + pshufb m1, m0 + neg xq + movu [dstq + xq], m1 + add xq, mmsize + cmp xq, wq + jl .loop0 + +.skip: + add wq, mmsize + .loop1: + neg xq + mov vb, [srcq + xq] + neg xq + mov [dstq + xq], vb + add xq, 1 + cmp xq, wq + jl .loop1 +RET +%endif diff --git a/libavfilter/x86/vf_hflip_init.c b/libavfilter/x86/vf_hflip_init.c new file mode 100644 index 0000000000..cd0e18f7ee --- /dev/null +++ b/libavfilter/x86/vf_hflip_init.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2017 Paul B Mahol + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/x86/cpu.h" +#include "libavfilter/hflip.h" + +void ff_hflip_byte_ssse3(const uint8_t *src, uint8_t *dst, int w); + +av_cold void ff_hflip_init_x86(FlipContext *s, int step[4]) +{ + int cpu_flags = av_get_cpu_flags(); + int i; + + for (i = 0; i < 4; i++) { + if (EXTERNAL_SSSE3(cpu_flags) && step[i] == 1) { + s->flip_line[i] = ff_hflip_byte_ssse3; + } + } +}
Signed-off-by: Paul B Mahol <onemda@gmail.com> --- libavfilter/hflip.h | 38 +++++++++++++++++++++++++ libavfilter/vf_hflip.c | 30 ++++++++++++++------ libavfilter/x86/Makefile | 2 ++ libavfilter/x86/vf_hflip.asm | 61 +++++++++++++++++++++++++++++++++++++++++ libavfilter/x86/vf_hflip_init.c | 38 +++++++++++++++++++++++++ 5 files changed, 160 insertions(+), 9 deletions(-) create mode 100644 libavfilter/hflip.h create mode 100644 libavfilter/x86/vf_hflip.asm create mode 100644 libavfilter/x86/vf_hflip_init.c