new file mode 100644
@@ -0,0 +1,36 @@
+ /*
+ * Copyright (c) 2019 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_ATADENOISE_H
+#define AVFILTER_ATADENOISE_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef struct ATADenoiseDSPContext {
+ void (*filter_row)(const uint8_t *src, uint8_t *dst,
+ const uint8_t **srcf,
+ int w, int mid, int size,
+ int thra, int thrb);
+} ATADenoiseDSPContext;
+
+void ff_atadenoise_init_x86(ATADenoiseDSPContext *dsp, int depth);
+
+#endif /* AVFILTER_ATADENOISE_H */
@@ -33,6 +33,7 @@
#define FF_BUFQUEUE_SIZE 129
#include "bufferqueue.h"
+#include "atadenoise.h"
#include "formats.h"
#include "internal.h"
#include "video.h"
@@ -57,10 +58,8 @@ typedef struct ATADenoiseContext {
int available;
int (*filter_slice)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
- void (*filter_row)(const uint8_t *src, uint8_t *dst,
- const uint8_t *srcf[SIZE],
- int w, int mid, int size,
- int thra, int thrb);
+
+ ATADenoiseDSPContext dsp;
} ATADenoiseContext;
#define OFFSET(x) offsetof(ATADenoiseContext, x)
@@ -209,7 +208,7 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
srcf[i] = data[i] + slice_start * linesize[i];
for (y = slice_start; y < slice_end; y++) {
- s->filter_row(src, dst, srcf, w, mid, size, thra, thrb);
+ s->dsp.filter_row(src, dst, srcf, w, mid, size, thra, thrb);
dst += out->linesize[p];
src += in->linesize[p];
@@ -239,9 +238,9 @@ static int config_input(AVFilterLink *inlink)
depth = desc->comp[0].depth;
s->filter_slice = filter_slice;
if (depth == 8)
- s->filter_row = filter_row8;
+ s->dsp.filter_row = filter_row8;
else
- s->filter_row = filter_row16;
+ s->dsp.filter_row = filter_row16;
s->thra[0] = s->fthra[0] * (1 << depth) - 1;
s->thra[1] = s->fthra[1] * (1 << depth) - 1;
@@ -250,6 +249,9 @@ static int config_input(AVFilterLink *inlink)
s->thrb[1] = s->fthrb[1] * (1 << depth) - 1;
s->thrb[2] = s->fthrb[2] * (1 << depth) - 1;
+ if (ARCH_X86)
+ ff_atadenoise_init_x86(&s->dsp, depth);
+
return 0;
}
@@ -2,6 +2,7 @@ OBJS-$(CONFIG_SCENE_SAD) += x86/scene_sad_init.o
OBJS-$(CONFIG_AFIR_FILTER) += x86/af_afir_init.o
OBJS-$(CONFIG_ANLMDN_FILTER) += x86/af_anlmdn_init.o
+OBJS-$(CONFIG_ATADENOISE_FILTER) += x86/vf_atadenoise_init.o
OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend_init.o
OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif_init.o
OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp_init.o
@@ -39,6 +40,7 @@ X86ASM-OBJS-$(CONFIG_SCENE_SAD) += x86/scene_sad.o
X86ASM-OBJS-$(CONFIG_AFIR_FILTER) += x86/af_afir.o
X86ASM-OBJS-$(CONFIG_ANLMDN_FILTER) += x86/af_anlmdn.o
+X86ASM-OBJS-$(CONFIG_ATADENOISE_FILTER) += x86/vf_atadenoise.o
X86ASM-OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend.o
X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o
X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp.o
new file mode 100644
@@ -0,0 +1,150 @@
+;*****************************************************************************
+;* x86-optimized functions for blend filter
+;*
+;* Copyright (C) 2019 Paul B Mahol
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%if ARCH_X86_64
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+pw_one: times 8 dw 1
+pw_ones: times 8 dw 65535
+
+SECTION .text
+
+;------------------------------------------------------------------------------
+; void ff_filter_row(const uint8_t *src, uint8_t *dst,
+; const uint8_t **srcf,
+; int w, int mid, int size,
+; int thra, int thrb)
+;------------------------------------------------------------------------------
+
+INIT_XMM sse4
+cglobal atadenoise_filter_row8, 8,10,13, src, dst, srcf, w, mid, size, i, j, srcfx, x
+ movsxdifnidn wq, wd
+ movsxdifnidn midq, midd
+ movsxdifnidn sizeq, sized
+ add srcq, wq
+ add dstq, wq
+ mov xq, wq
+ dec sizeq
+ neg xq
+ movd m4, r6m
+ SPLATW m4, m4
+ movd m5, r7m
+ SPLATW m5, m5
+ pxor m2, m2
+ mova m10, [pw_ones]
+
+ .loop:
+ mov iq, midq
+ mov jq, midq
+ pxor m3, m3
+ pxor m11, m11
+ movu m0, [srcq + xq]
+ punpcklbw m0, m2
+ mova m7, m0
+ mova m8, [pw_one]
+ mova m12, [pw_ones]
+
+ .loop0:
+ inc iq
+ dec jq
+
+ mov srcfxq, [srcfq + jq * 8]
+ add srcfxq, wq
+
+ movu m1, [srcfxq + xq]
+ punpcklbw m1, m2
+ mova m9, m1
+ psubw m1, m0
+ pabsw m1, m1
+ paddw m11, m1
+ pcmpgtw m1, m4
+ mova m6, m11
+ pcmpgtw m6, m5
+ por m6, m1
+ pxor m6, m10
+ pand m12, m6
+ pand m9, m12
+ paddw m7, m9
+ mova m6, m12
+ psrlw m6, 15
+ paddw m8, m6
+
+ mov srcfxq, [srcfq + iq * 8]
+ add srcfxq, wq
+
+ movu m1, [srcfxq + xq]
+ punpcklbw m1, m2
+ mova m9, m1
+ psubw m1, m0
+ pabsw m1, m1
+ paddw m3, m1
+ pcmpgtw m1, m4
+ mova m6, m3
+ pcmpgtw m6, m5
+ por m6, m1
+ pxor m6, m10
+ pand m12, m6
+ pand m9, m12
+ paddw m7, m9
+ mova m6, m12
+ psrlw m6, 15
+ paddw m8, m6
+
+ ptest m12, m12
+ jz .finish
+
+ cmp iq, sizeq
+ jl .loop0
+
+ .finish:
+ mova m1, m7
+ mova m6, m8
+
+ punpcklwd m7, m2
+ punpcklwd m8, m2
+ cvtdq2ps m7, m7
+ cvtdq2ps m8, m8
+ divps m7, m8
+ cvttps2dq m7, m7
+ packssdw m7, m7
+ packuswb m7, m7
+
+ movd [dstq + xq], m7
+
+ punpckhwd m1, m2
+ punpckhwd m6, m2
+ cvtdq2ps m1, m1
+ cvtdq2ps m6, m6
+ divps m1, m6
+ cvttps2dq m1, m1
+ packssdw m1, m1
+ packuswb m1, m1
+
+ movd [dstq + xq + 4], m1
+
+ add xq, mmsize/2
+ jl .loop
+ RET
+
+%endif
new file mode 100644
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2019 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/mem.h"
+#include "libavutil/x86/asm.h"
+#include "libavutil/x86/cpu.h"
+#include "libavfilter/atadenoise.h"
+
+void ff_atadenoise_filter_row8_sse4(const uint8_t *src, uint8_t *dst,
+ const uint8_t **srcf,
+ int w, int mid, int size,
+ int thra, int thrb);
+
+av_cold void ff_atadenoise_init_x86(ATADenoiseDSPContext *dsp, int depth)
+{
+ int cpu_flags = av_get_cpu_flags();
+
+ if (ARCH_X86_64 && EXTERNAL_SSE4(cpu_flags) && depth <= 8) {
+ dsp->filter_row = ff_atadenoise_filter_row8_sse4;
+ }
+}
Signed-off-by: Paul B Mahol <onemda@gmail.com> --- libavfilter/atadenoise.h | 36 +++++++ libavfilter/vf_atadenoise.c | 16 +-- libavfilter/x86/Makefile | 2 + libavfilter/x86/vf_atadenoise.asm | 150 +++++++++++++++++++++++++++ libavfilter/x86/vf_atadenoise_init.c | 40 +++++++ 5 files changed, 237 insertions(+), 7 deletions(-) create mode 100644 libavfilter/atadenoise.h create mode 100644 libavfilter/x86/vf_atadenoise.asm create mode 100644 libavfilter/x86/vf_atadenoise_init.c