Message ID | 20211104041841.95318-1-jianhua.wu@intel.com |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel,v2,1/3] avfilter/x86/vf_exposure: add x86 SIMD optimization | expand |
Context | Check | Description |
---|---|---|
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | success | Make fate finished |
andriy/make_ppc | success | Make finished |
andriy/make_fate_ppc | success | Make fate finished |
Ping. > From: Wu, Jianhua <jianhua.wu@intel.com> > Sent: Thursday, November 4, 2021 12:19 PM > To: ffmpeg-devel@ffmpeg.org > Cc: Wu, Jianhua <jianhua.wu@intel.com> > Subject: [PATCH v2 1/3] avfilter/x86/vf_exposure: add x86 SIMD optimization > > Performance data(Less is better): > exposure_c: 857394 > exposure_sse: 327589 > > Signed-off-by: Wu Jianhua <jianhua.wu@intel.com> > --- > libavfilter/exposure.h | 36 +++++++++++++++++++ > libavfilter/vf_exposure.c | 36 +++++++++---------- > libavfilter/x86/Makefile | 2 ++ > libavfilter/x86/vf_exposure.asm | 55 > ++++++++++++++++++++++++++++++ > libavfilter/x86/vf_exposure_init.c | 36 +++++++++++++++++++ > 5 files changed, 147 insertions(+), 18 deletions(-) create mode 100644 > libavfilter/exposure.h create mode 100644 libavfilter/x86/vf_exposure.asm > create mode 100644 libavfilter/x86/vf_exposure_init.c > > diff --git a/libavfilter/exposure.h b/libavfilter/exposure.h new file mode > 100644 index 0000000000..e76a517826 > --- /dev/null > +++ b/libavfilter/exposure.h > @@ -0,0 +1,36 @@ > +/* > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA > +02110-1301 USA */ > + > +#ifndef AVFILTER_EXPOSURE_H > +#define AVFILTER_EXPOSURE_H > +#include "avfilter.h" > + > +typedef struct ExposureContext { > + const AVClass *class; > + > + float exposure; > + float black; > + float scale; > + > + void (*exposure_func)(float *ptr, int length, float black, float > +scale); } ExposureContext; > + > +void ff_exposure_init(ExposureContext *s); void > +ff_exposure_init_x86(ExposureContext *s); > + > +#endif > diff --git a/libavfilter/vf_exposure.c b/libavfilter/vf_exposure.c index > 108fba7930..045ae710d3 100644 > --- a/libavfilter/vf_exposure.c > +++ b/libavfilter/vf_exposure.c > @@ -26,23 +26,20 @@ > #include "formats.h" > #include "internal.h" > #include "video.h" > +#include "exposure.h" > > -typedef struct ExposureContext { > - const AVClass *class; > - > - float exposure; > - float black; > +static void exposure_c(float *ptr, int length, float black, float > +scale) { > + int i; > > - float scale; > - int (*do_slice)(AVFilterContext *s, void *arg, > - int jobnr, int nb_jobs); > -} ExposureContext; > + for (i = 0; i < length; i++) > + ptr[i] = (ptr[i] - black) * scale; } > > static int exposure_slice(AVFilterContext *ctx, void *arg, int jobnr, int > nb_jobs) { > ExposureContext *s = ctx->priv; > AVFrame *frame = arg; > - const int width = frame->width; > const int height = frame->height; > const int slice_start = (height * jobnr) / nb_jobs; > const int slice_end = (height * (jobnr + 1)) / nb_jobs; @@ -52,24 +49,27 > @@ static int exposure_slice(AVFilterContext *ctx, void *arg, int jobnr, int > nb_job > for (int p = 0; p < 3; p++) { > const int linesize = frame->linesize[p] / 4; > float *ptr = (float *)frame->data[p] + slice_start * linesize; > - for (int y = slice_start; y < slice_end; y++) { > - for (int x = 0; x < width; x++) > - ptr[x] = (ptr[x] - black) * scale; > - > - ptr += linesize; > - } > + s->exposure_func(ptr, linesize * (slice_end - slice_start), > + black, scale); > } > > return 0; > } > > +void ff_exposure_init(ExposureContext *s) { > + s->exposure_func = exposure_c; > + > + if (ARCH_X86) > + ff_exposure_init_x86(s); > +} > + > static int filter_frame(AVFilterLink *inlink, AVFrame *frame) { > AVFilterContext *ctx = inlink->dst; > ExposureContext *s = ctx->priv; > > s->scale = 1.f / (exp2f(-s->exposure) - s->black); > - ff_filter_execute(ctx, s->do_slice, frame, NULL, > + ff_filter_execute(ctx, exposure_slice, frame, NULL, > FFMIN(frame->height, ff_filter_get_nb_threads(ctx))); > > return ff_filter_frame(ctx->outputs[0], frame); @@ -80,7 +80,7 @@ static > av_cold int config_input(AVFilterLink *inlink) > AVFilterContext *ctx = inlink->dst; > ExposureContext *s = ctx->priv; > > - s->do_slice = exposure_slice; > + ff_exposure_init(s); > > return 0; > } > diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile index > a29941eaeb..e84a388aa5 100644 > --- a/libavfilter/x86/Makefile > +++ b/libavfilter/x86/Makefile > @@ -8,6 +8,7 @@ OBJS-$(CONFIG_BWDIF_FILTER) += > x86/vf_bwdif_init.o > OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp_init.o > OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_convolution_init.o > OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq_init.o > +OBJS-$(CONFIG_EXPOSURE_FILTER) += x86/vf_exposure_init.o > OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp_init.o > OBJS-$(CONFIG_GBLUR_FILTER) += x86/vf_gblur_init.o > OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun_init.o > @@ -49,6 +50,7 @@ X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += > x86/vf_bwdif.o > X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp.o > X86ASM-OBJS-$(CONFIG_CONVOLUTION_FILTER) += > x86/vf_convolution.o > X86ASM-OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq.o > +X86ASM-OBJS-$(CONFIG_EXPOSURE_FILTER) += x86/vf_exposure.o > X86ASM-OBJS-$(CONFIG_FRAMERATE_FILTER) += x86/vf_framerate.o > X86ASM-OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp.o > X86ASM-OBJS-$(CONFIG_GBLUR_FILTER) += x86/vf_gblur.o > diff --git a/libavfilter/x86/vf_exposure.asm > b/libavfilter/x86/vf_exposure.asm new file mode 100644 index > 0000000000..3351c6fb3b > --- /dev/null > +++ b/libavfilter/x86/vf_exposure.asm > @@ -0,0 +1,55 @@ > +;********************************************************* > ************* > +******* > +;* x86-optimized functions for exposure filter > +;* > +;* This file is part of FFmpeg. > +;* > +;* FFmpeg is free software; you can redistribute it and/or > +;* modify it under the terms of the GNU Lesser General Public > +;* License as published by the Free Software Foundation; either > +;* version 2.1 of the License, or (at your option) any later version. > +;* > +;* FFmpeg is distributed in the hope that it will be useful, > +;* but WITHOUT ANY WARRANTY; without even the implied warranty of > +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > GNU > +;* Lesser General Public License for more details. > +;* > +;* You should have received a copy of the GNU Lesser General Public > +;* License along with FFmpeg; if not, write to the Free Software > +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA > +02110-1301 USA > +;********************************************************* > ************* > +******** > + > +%include "libavutil/x86/x86util.asm" > + > +SECTION .text > + > +;********************************************************* > ************* > +********* ; void ff_exposure(float *ptr, int length, float black, float > +scale); > +;********************************************************* > ************* > +********* > +%macro EXPOSURE 0 > +cglobal exposure, 2, 2, 4, ptr, length, black, scale > + movsxdifnidn lengthq, lengthd > +%if WIN64 > + VBROADCASTSS m0, xmm2 > + VBROADCASTSS m1, xmm3 > +%else > + VBROADCASTSS m0, xmm0 > + VBROADCASTSS m1, xmm1 > +%endif > + > +.loop: > + movu m2, [ptrq] > + subps m2, m2, m0 > + mulps m2, m2, m1 > + movu [ptrq], m2 > + add ptrq, mmsize > + sub lengthq, mmsize/4 > + > + jg .loop > + > + RET > +%endmacro > + > +%if ARCH_X86_64 > +INIT_XMM sse > +EXPOSURE > +%endif > diff --git a/libavfilter/x86/vf_exposure_init.c > b/libavfilter/x86/vf_exposure_init.c > new file mode 100644 > index 0000000000..de1b360f6c > --- /dev/null > +++ b/libavfilter/x86/vf_exposure_init.c > @@ -0,0 +1,36 @@ > +/* > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA > +02110-1301 USA */ > + > +#include "config.h" > + > +#include "libavutil/attributes.h" > +#include "libavutil/cpu.h" > +#include "libavutil/x86/cpu.h" > +#include "libavfilter/exposure.h" > + > +void ff_exposure_sse(float *ptr, int length, float black, float scale); > + > +av_cold void ff_exposure_init_x86(ExposureContext *s) { > + int cpu_flags = av_get_cpu_flags(); > + > +#if ARCH_X86_64 > + if (EXTERNAL_SSE(cpu_flags)) > + s->exposure_func = ff_exposure_sse; #endif } > -- > 2.17.1
will apply soon
On 11/4/2021 1:18 AM, Wu Jianhua wrote: > Performance data(Less is better): > exposure_c: 857394 > exposure_sse: 327589 > > Signed-off-by: Wu Jianhua <jianhua.wu@intel.com> > --- > libavfilter/exposure.h | 36 +++++++++++++++++++ > libavfilter/vf_exposure.c | 36 +++++++++---------- > libavfilter/x86/Makefile | 2 ++ > libavfilter/x86/vf_exposure.asm | 55 ++++++++++++++++++++++++++++++ > libavfilter/x86/vf_exposure_init.c | 36 +++++++++++++++++++ > 5 files changed, 147 insertions(+), 18 deletions(-) > create mode 100644 libavfilter/exposure.h > create mode 100644 libavfilter/x86/vf_exposure.asm > create mode 100644 libavfilter/x86/vf_exposure_init.c > > diff --git a/libavfilter/exposure.h b/libavfilter/exposure.h > new file mode 100644 > index 0000000000..e76a517826 > --- /dev/null > +++ b/libavfilter/exposure.h > @@ -0,0 +1,36 @@ > +/* > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > + */ > + > +#ifndef AVFILTER_EXPOSURE_H > +#define AVFILTER_EXPOSURE_H > +#include "avfilter.h" > + > +typedef struct ExposureContext { > + const AVClass *class; > + > + float exposure; > + float black; > + float scale; > + > + void (*exposure_func)(float *ptr, int length, float black, float scale); > +} ExposureContext; > + > +void ff_exposure_init(ExposureContext *s); > +void ff_exposure_init_x86(ExposureContext *s); > + > +#endif > diff --git a/libavfilter/vf_exposure.c b/libavfilter/vf_exposure.c > index 108fba7930..045ae710d3 100644 > --- a/libavfilter/vf_exposure.c > +++ b/libavfilter/vf_exposure.c > @@ -26,23 +26,20 @@ > #include "formats.h" > #include "internal.h" > #include "video.h" > +#include "exposure.h" > > -typedef struct ExposureContext { > - const AVClass *class; > - > - float exposure; > - float black; > +static void exposure_c(float *ptr, int length, float black, float scale) > +{ > + int i; > > - float scale; > - int (*do_slice)(AVFilterContext *s, void *arg, > - int jobnr, int nb_jobs); > -} ExposureContext; > + for (i = 0; i < length; i++) > + ptr[i] = (ptr[i] - black) * scale; > +} > > static int exposure_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) > { > ExposureContext *s = ctx->priv; > AVFrame *frame = arg; > - const int width = frame->width; > const int height = frame->height; > const int slice_start = (height * jobnr) / nb_jobs; > const int slice_end = (height * (jobnr + 1)) / nb_jobs; > @@ -52,24 +49,27 @@ static int exposure_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_job > for (int p = 0; p < 3; p++) { > const int linesize = frame->linesize[p] / 4; > float *ptr = (float *)frame->data[p] + slice_start * linesize; > - for (int y = slice_start; y < slice_end; y++) { > - for (int x = 0; x < width; x++) > - ptr[x] = (ptr[x] - black) * scale; > - > - ptr += linesize; > - } > + s->exposure_func(ptr, linesize * (slice_end - slice_start), black, scale); > } > > return 0; > } > > +void ff_exposure_init(ExposureContext *s) > +{ > + s->exposure_func = exposure_c; > + > + if (ARCH_X86) > + ff_exposure_init_x86(s); > +} > + > static int filter_frame(AVFilterLink *inlink, AVFrame *frame) > { > AVFilterContext *ctx = inlink->dst; > ExposureContext *s = ctx->priv; > > s->scale = 1.f / (exp2f(-s->exposure) - s->black); > - ff_filter_execute(ctx, s->do_slice, frame, NULL, > + ff_filter_execute(ctx, exposure_slice, frame, NULL, > FFMIN(frame->height, ff_filter_get_nb_threads(ctx))); > > return ff_filter_frame(ctx->outputs[0], frame); > @@ -80,7 +80,7 @@ static av_cold int config_input(AVFilterLink *inlink) > AVFilterContext *ctx = inlink->dst; > ExposureContext *s = ctx->priv; > > - s->do_slice = exposure_slice; > + ff_exposure_init(s); > > return 0; > } > diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile > index a29941eaeb..e84a388aa5 100644 > --- a/libavfilter/x86/Makefile > +++ b/libavfilter/x86/Makefile > @@ -8,6 +8,7 @@ OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif_init.o > OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp_init.o > OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_convolution_init.o > OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq_init.o > +OBJS-$(CONFIG_EXPOSURE_FILTER) += x86/vf_exposure_init.o > OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp_init.o > OBJS-$(CONFIG_GBLUR_FILTER) += x86/vf_gblur_init.o > OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun_init.o > @@ -49,6 +50,7 @@ X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o > X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp.o > X86ASM-OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_convolution.o > X86ASM-OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq.o > +X86ASM-OBJS-$(CONFIG_EXPOSURE_FILTER) += x86/vf_exposure.o > X86ASM-OBJS-$(CONFIG_FRAMERATE_FILTER) += x86/vf_framerate.o > X86ASM-OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp.o > X86ASM-OBJS-$(CONFIG_GBLUR_FILTER) += x86/vf_gblur.o > diff --git a/libavfilter/x86/vf_exposure.asm b/libavfilter/x86/vf_exposure.asm > new file mode 100644 > index 0000000000..3351c6fb3b > --- /dev/null > +++ b/libavfilter/x86/vf_exposure.asm > @@ -0,0 +1,55 @@ > +;***************************************************************************** > +;* x86-optimized functions for exposure filter > +;* > +;* This file is part of FFmpeg. > +;* > +;* FFmpeg is free software; you can redistribute it and/or > +;* modify it under the terms of the GNU Lesser General Public > +;* License as published by the Free Software Foundation; either > +;* version 2.1 of the License, or (at your option) any later version. > +;* > +;* FFmpeg is distributed in the hope that it will be useful, > +;* but WITHOUT ANY WARRANTY; without even the implied warranty of > +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +;* Lesser General Public License for more details. > +;* > +;* You should have received a copy of the GNU Lesser General Public > +;* License along with FFmpeg; if not, write to the Free Software > +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > +;****************************************************************************** > + > +%include "libavutil/x86/x86util.asm" > + > +SECTION .text > + > +;******************************************************************************* > +; void ff_exposure(float *ptr, int length, float black, float scale); > +;******************************************************************************* > +%macro EXPOSURE 0 > +cglobal exposure, 2, 2, 4, ptr, length, black, scale > + movsxdifnidn lengthq, lengthd > +%if WIN64 > + VBROADCASTSS m0, xmm2 > + VBROADCASTSS m1, xmm3 > +%else > + VBROADCASTSS m0, xmm0 > + VBROADCASTSS m1, xmm1 > +%endif > + > +.loop: > + movu m2, [ptrq] > + subps m2, m2, m0 > + mulps m2, m2, m1 > + movu [ptrq], m2 > + add ptrq, mmsize > + sub lengthq, mmsize/4 > + > + jg .loop > + > + RET > +%endmacro > + > +%if ARCH_X86_64 Why x86_64 only? > +INIT_XMM sse > +EXPOSURE Is it not possible to add an AVX version to process eight floats per loop? The function is already written in a way that you would only need to do %if HAVE_AVX_EXTERNAL INIT_YMM avx EXPOSURE %endif For it. And ptr alignment is not a problem seeing you're using unaligned movs. > +%endif > diff --git a/libavfilter/x86/vf_exposure_init.c b/libavfilter/x86/vf_exposure_init.c > new file mode 100644 > index 0000000000..de1b360f6c > --- /dev/null > +++ b/libavfilter/x86/vf_exposure_init.c > @@ -0,0 +1,36 @@ > +/* > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > + */ > + > +#include "config.h" > + > +#include "libavutil/attributes.h" > +#include "libavutil/cpu.h" > +#include "libavutil/x86/cpu.h" > +#include "libavfilter/exposure.h" > + > +void ff_exposure_sse(float *ptr, int length, float black, float scale); > + > +av_cold void ff_exposure_init_x86(ExposureContext *s) > +{ > + int cpu_flags = av_get_cpu_flags(); > + > +#if ARCH_X86_64 > + if (EXTERNAL_SSE(cpu_flags)) > + s->exposure_func = ff_exposure_sse; > +#endif > +} >
On 11/20/2021 1:46 PM, James Almer wrote: > On 11/4/2021 1:18 AM, Wu Jianhua wrote: >> diff --git a/libavfilter/x86/vf_exposure.asm >> b/libavfilter/x86/vf_exposure.asm >> new file mode 100644 >> index 0000000000..3351c6fb3b >> --- /dev/null >> +++ b/libavfilter/x86/vf_exposure.asm >> @@ -0,0 +1,55 @@ >> +;***************************************************************************** >> >> +;* x86-optimized functions for exposure filter >> +;* >> +;* This file is part of FFmpeg. >> +;* >> +;* FFmpeg is free software; you can redistribute it and/or >> +;* modify it under the terms of the GNU Lesser General Public >> +;* License as published by the Free Software Foundation; either >> +;* version 2.1 of the License, or (at your option) any later version. >> +;* >> +;* FFmpeg is distributed in the hope that it will be useful, >> +;* but WITHOUT ANY WARRANTY; without even the implied warranty of >> +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >> +;* Lesser General Public License for more details. >> +;* >> +;* You should have received a copy of the GNU Lesser General Public >> +;* License along with FFmpeg; if not, write to the Free Software >> +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA >> 02110-1301 USA >> +;****************************************************************************** >> >> + >> +%include "libavutil/x86/x86util.asm" >> + >> +SECTION .text >> + >> +;******************************************************************************* >> >> +; void ff_exposure(float *ptr, int length, float black, float scale); >> +;******************************************************************************* >> >> +%macro EXPOSURE 0 >> +cglobal exposure, 2, 2, 4, ptr, length, black, scale >> + movsxdifnidn lengthq, lengthd >> +%if WIN64 >> + VBROADCASTSS m0, xmm2 >> + VBROADCASTSS m1, xmm3 >> +%else >> + VBROADCASTSS m0, xmm0 >> + VBROADCASTSS m1, xmm1 >> +%endif >> + >> +.loop: >> + movu m2, [ptrq] >> + subps m2, m2, m0 >> + mulps m2, m2, m1 >> + movu [ptrq], m2 >> + add ptrq, mmsize >> + sub lengthq, mmsize/4 >> + >> + jg .loop >> + >> + RET >> +%endmacro >> + >> +%if ARCH_X86_64 > > Why x86_64 only? > >> +INIT_XMM sse >> +EXPOSURE > > Is it not possible to add an AVX version to process eight floats per > loop? The function is already written in a way that you would only need > to do > > %if HAVE_AVX_EXTERNAL > INIT_YMM avx > EXPOSURE > %endif > > For it. And ptr alignment is not a problem seeing you're using unaligned > movs. Ignore this part. I need to remember to check entire patchsets before starting to send replies...
diff --git a/libavfilter/exposure.h b/libavfilter/exposure.h new file mode 100644 index 0000000000..e76a517826 --- /dev/null +++ b/libavfilter/exposure.h @@ -0,0 +1,36 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVFILTER_EXPOSURE_H +#define AVFILTER_EXPOSURE_H +#include "avfilter.h" + +typedef struct ExposureContext { + const AVClass *class; + + float exposure; + float black; + float scale; + + void (*exposure_func)(float *ptr, int length, float black, float scale); +} ExposureContext; + +void ff_exposure_init(ExposureContext *s); +void ff_exposure_init_x86(ExposureContext *s); + +#endif diff --git a/libavfilter/vf_exposure.c b/libavfilter/vf_exposure.c index 108fba7930..045ae710d3 100644 --- a/libavfilter/vf_exposure.c +++ b/libavfilter/vf_exposure.c @@ -26,23 +26,20 @@ #include "formats.h" #include "internal.h" #include "video.h" +#include "exposure.h" -typedef struct ExposureContext { - const AVClass *class; - - float exposure; - float black; +static void exposure_c(float *ptr, int length, float black, float scale) +{ + int i; - float scale; - int (*do_slice)(AVFilterContext *s, void *arg, - int jobnr, int nb_jobs); -} ExposureContext; + for (i = 0; i < length; i++) + ptr[i] = (ptr[i] - black) * scale; +} static int exposure_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) { ExposureContext *s = ctx->priv; AVFrame *frame = arg; - const int width = frame->width; const int height = frame->height; const int slice_start = (height * jobnr) / nb_jobs; const int slice_end = (height * (jobnr + 1)) / nb_jobs; @@ -52,24 +49,27 @@ static int exposure_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_job for (int p = 0; p < 3; p++) { const int linesize = frame->linesize[p] / 4; float *ptr = (float *)frame->data[p] + slice_start * linesize; - for (int y = slice_start; y < slice_end; y++) { - for (int x = 0; x < width; x++) - ptr[x] = (ptr[x] - black) * scale; - - ptr += linesize; - } + s->exposure_func(ptr, linesize * (slice_end - slice_start), black, scale); } return 0; } +void ff_exposure_init(ExposureContext *s) +{ + s->exposure_func = exposure_c; + + if (ARCH_X86) + ff_exposure_init_x86(s); +} + static int filter_frame(AVFilterLink *inlink, AVFrame *frame) { AVFilterContext *ctx = inlink->dst; ExposureContext *s = ctx->priv; s->scale = 1.f / (exp2f(-s->exposure) - s->black); - ff_filter_execute(ctx, s->do_slice, frame, NULL, + ff_filter_execute(ctx, exposure_slice, frame, NULL, FFMIN(frame->height, ff_filter_get_nb_threads(ctx))); return ff_filter_frame(ctx->outputs[0], frame); @@ -80,7 +80,7 @@ static av_cold int config_input(AVFilterLink *inlink) AVFilterContext *ctx = inlink->dst; ExposureContext *s = ctx->priv; - s->do_slice = exposure_slice; + ff_exposure_init(s); return 0; } diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile index a29941eaeb..e84a388aa5 100644 --- a/libavfilter/x86/Makefile +++ b/libavfilter/x86/Makefile @@ -8,6 +8,7 @@ OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif_init.o OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp_init.o OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_convolution_init.o OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq_init.o +OBJS-$(CONFIG_EXPOSURE_FILTER) += x86/vf_exposure_init.o OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp_init.o OBJS-$(CONFIG_GBLUR_FILTER) += x86/vf_gblur_init.o OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun_init.o @@ -49,6 +50,7 @@ X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp.o X86ASM-OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_convolution.o X86ASM-OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq.o +X86ASM-OBJS-$(CONFIG_EXPOSURE_FILTER) += x86/vf_exposure.o X86ASM-OBJS-$(CONFIG_FRAMERATE_FILTER) += x86/vf_framerate.o X86ASM-OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp.o X86ASM-OBJS-$(CONFIG_GBLUR_FILTER) += x86/vf_gblur.o diff --git a/libavfilter/x86/vf_exposure.asm b/libavfilter/x86/vf_exposure.asm new file mode 100644 index 0000000000..3351c6fb3b --- /dev/null +++ b/libavfilter/x86/vf_exposure.asm @@ -0,0 +1,55 @@ +;***************************************************************************** +;* x86-optimized functions for exposure filter +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION .text + +;******************************************************************************* +; void ff_exposure(float *ptr, int length, float black, float scale); +;******************************************************************************* +%macro EXPOSURE 0 +cglobal exposure, 2, 2, 4, ptr, length, black, scale + movsxdifnidn lengthq, lengthd +%if WIN64 + VBROADCASTSS m0, xmm2 + VBROADCASTSS m1, xmm3 +%else + VBROADCASTSS m0, xmm0 + VBROADCASTSS m1, xmm1 +%endif + +.loop: + movu m2, [ptrq] + subps m2, m2, m0 + mulps m2, m2, m1 + movu [ptrq], m2 + add ptrq, mmsize + sub lengthq, mmsize/4 + + jg .loop + + RET +%endmacro + +%if ARCH_X86_64 +INIT_XMM sse +EXPOSURE +%endif diff --git a/libavfilter/x86/vf_exposure_init.c b/libavfilter/x86/vf_exposure_init.c new file mode 100644 index 0000000000..de1b360f6c --- /dev/null +++ b/libavfilter/x86/vf_exposure_init.c @@ -0,0 +1,36 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/x86/cpu.h" +#include "libavfilter/exposure.h" + +void ff_exposure_sse(float *ptr, int length, float black, float scale); + +av_cold void ff_exposure_init_x86(ExposureContext *s) +{ + int cpu_flags = av_get_cpu_flags(); + +#if ARCH_X86_64 + if (EXTERNAL_SSE(cpu_flags)) + s->exposure_func = ff_exposure_sse; +#endif +}
Performance data(Less is better): exposure_c: 857394 exposure_sse: 327589 Signed-off-by: Wu Jianhua <jianhua.wu@intel.com> --- libavfilter/exposure.h | 36 +++++++++++++++++++ libavfilter/vf_exposure.c | 36 +++++++++---------- libavfilter/x86/Makefile | 2 ++ libavfilter/x86/vf_exposure.asm | 55 ++++++++++++++++++++++++++++++ libavfilter/x86/vf_exposure_init.c | 36 +++++++++++++++++++ 5 files changed, 147 insertions(+), 18 deletions(-) create mode 100644 libavfilter/exposure.h create mode 100644 libavfilter/x86/vf_exposure.asm create mode 100644 libavfilter/x86/vf_exposure_init.c