[FFmpeg-devel] avfilter: add anlmdn filter x86 SIMD optimizations

Submitted by Paul B Mahol on Jan. 9, 2019, 7:07 p.m.

Details

Message ID 20190109190744.20034-1-onemda@gmail.com
State Accepted
Headers show

Commit Message

Paul B Mahol Jan. 9, 2019, 7:07 p.m.
Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
 libavfilter/af_anlmdn.c          | 38 +++++++++++----
 libavfilter/af_anlmdndsp.h       | 40 ++++++++++++++++
 libavfilter/x86/Makefile         |  2 +
 libavfilter/x86/af_anlmdn.asm    | 80 ++++++++++++++++++++++++++++++++
 libavfilter/x86/af_anlmdn_init.c | 35 ++++++++++++++
 5 files changed, 185 insertions(+), 10 deletions(-)
 create mode 100644 libavfilter/af_anlmdndsp.h
 create mode 100644 libavfilter/x86/af_anlmdn.asm
 create mode 100644 libavfilter/x86/af_anlmdn_init.c

Patch hide | download patch | download mbox

diff --git a/libavfilter/af_anlmdn.c b/libavfilter/af_anlmdn.c
index 62931e37cc..9ef0bf7239 100644
--- a/libavfilter/af_anlmdn.c
+++ b/libavfilter/af_anlmdn.c
@@ -27,6 +27,8 @@ 
 #include "audio.h"
 #include "formats.h"
 
+#include "af_anlmdndsp.h"
+
 #define SQR(x) ((x) * (x))
 
 typedef struct AudioNLMeansContext {
@@ -49,7 +51,7 @@  typedef struct AudioNLMeansContext {
 
     AVAudioFifo *fifo;
 
-    float (*compute_distance)(const float *f1, const float *f2, int K);
+    AudioNLMDNDSPContext dsp;
 } AudioNLMeansContext;
 
 #define OFFSET(x) offsetof(AudioNLMeansContext, x)
@@ -93,7 +95,7 @@  static int query_formats(AVFilterContext *ctx)
     return ff_set_common_samplerates(ctx, formats);
 }
 
-static float compute_distance_ssd(const float *f1, const float *f2, int K)
+static float compute_distance_ssd_c(const float *f1, const float *f2, ptrdiff_t K)
 {
     float distance = 0.;
 
@@ -103,6 +105,25 @@  static float compute_distance_ssd(const float *f1, const float *f2, int K)
     return distance;
 }
 
+static void compute_cache_c(float *cache, const float *f,
+                            ptrdiff_t S, ptrdiff_t K,
+                            ptrdiff_t i, ptrdiff_t jj)
+{
+    int v = 0;
+
+    for (int j = jj; j < jj + S; j++, v++)
+        cache[v] += -SQR(f[i - K - 1] - f[j - K - 1]) + SQR(f[i + K] - f[j + K]);
+}
+
+void ff_anlmdn_init(AudioNLMDNDSPContext *dsp)
+{
+    dsp->compute_distance_ssd = compute_distance_ssd_c;
+    dsp->compute_cache        = compute_cache_c;
+
+    if (ARCH_X86)
+        ff_anlmdn_init_x86(dsp);
+}
+
 static int config_output(AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
@@ -129,7 +150,7 @@  static int config_output(AVFilterLink *outlink)
     if (!s->fifo)
         return AVERROR(ENOMEM);
 
-    s->compute_distance = compute_distance_ssd;
+    ff_anlmdn_init(&s->dsp);
 
     return 0;
 }
@@ -153,17 +174,14 @@  static int filter_channel(AVFilterContext *ctx, void *arg, int ch, int nb_jobs)
             for (int j = i - S; j <= i + S; j++) {
                 if (i == j)
                     continue;
-                cache[v++] = s->compute_distance(f + i, f + j, K);
+                cache[v++] = s->dsp.compute_distance_ssd(f + i, f + j, K);
             }
         } else {
-            for (int j = i - S; j < i; j++, v++)
-                cache[v] = cache[v] - SQR(f[i - K - 1] - f[j - K - 1]) + SQR(f[i + K] - f[j + K]);
-
-            for (int j = i + 1; j <= i + S; j++, v++)
-                cache[v] = cache[v] - SQR(f[i - K - 1] - f[j - K - 1]) + SQR(f[i + K] - f[j + K]);
+            s->dsp.compute_cache(cache, f, S, K, i, i - S);
+            s->dsp.compute_cache(cache + S, f, S, K, i, i + 1);
         }
 
-        for (int j = 0; j < v; j++) {
+        for (int j = 0; j < 2 * S; j++) {
             const float distance = cache[j];
             float w;
 
diff --git a/libavfilter/af_anlmdndsp.h b/libavfilter/af_anlmdndsp.h
new file mode 100644
index 0000000000..d8f5136cd8
--- /dev/null
+++ b/libavfilter/af_anlmdndsp.h
@@ -0,0 +1,40 @@ 
+/*
+ * Copyright (c) 2019 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_ANLMDNDSP_H
+#define AVFILTER_ANLMDNDSP_H
+
+#include "libavutil/common.h"
+
+#include "audio.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+
+typedef struct AudioNLMDNDSPContext {
+    float (*compute_distance_ssd)(const float *f1, const float *f2, ptrdiff_t K);
+    void (*compute_cache)(float *cache, const float *f, ptrdiff_t S, ptrdiff_t K,
+                          ptrdiff_t i, ptrdiff_t jj);
+} AudioNLMDNDSPContext;
+
+void ff_anlmdn_init(AudioNLMDNDSPContext *s);
+void ff_anlmdn_init_x86(AudioNLMDNDSPContext *s);
+
+#endif /* AVFILTER_ANLMDNDSP_H */
diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile
index 6eecb94359..17499f14da 100644
--- a/libavfilter/x86/Makefile
+++ b/libavfilter/x86/Makefile
@@ -1,6 +1,7 @@ 
 OBJS-$(CONFIG_SCENE_SAD)                     += x86/scene_sad_init.o
 
 OBJS-$(CONFIG_AFIR_FILTER)                   += x86/af_afir_init.o
+OBJS-$(CONFIG_ANLMDN_FILTER)                 += x86/af_anlmdn_init.o
 OBJS-$(CONFIG_BLEND_FILTER)                  += x86/vf_blend_init.o
 OBJS-$(CONFIG_BWDIF_FILTER)                  += x86/vf_bwdif_init.o
 OBJS-$(CONFIG_COLORSPACE_FILTER)             += x86/colorspacedsp_init.o
@@ -34,6 +35,7 @@  OBJS-$(CONFIG_YADIF_FILTER)                  += x86/vf_yadif_init.o
 X86ASM-OBJS-$(CONFIG_SCENE_SAD)              += x86/scene_sad.o
 
 X86ASM-OBJS-$(CONFIG_AFIR_FILTER)            += x86/af_afir.o
+X86ASM-OBJS-$(CONFIG_ANLMDN_FILTER)          += x86/af_anlmdn.o
 X86ASM-OBJS-$(CONFIG_BLEND_FILTER)           += x86/vf_blend.o
 X86ASM-OBJS-$(CONFIG_BWDIF_FILTER)           += x86/vf_bwdif.o
 X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER)      += x86/colorspacedsp.o
diff --git a/libavfilter/x86/af_anlmdn.asm b/libavfilter/x86/af_anlmdn.asm
new file mode 100644
index 0000000000..9630f4771c
--- /dev/null
+++ b/libavfilter/x86/af_anlmdn.asm
@@ -0,0 +1,80 @@ 
+;*****************************************************************************
+;* x86-optimized functions for anlmdn filter
+;* Copyright (c) 2017 Paul B Mahol
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION .text
+
+;------------------------------------------------------------------------------
+; float ff_compute_distance_ssd(float *f1, const float *f2, ptrdiff_t len)
+;------------------------------------------------------------------------------
+
+INIT_XMM sse
+cglobal compute_distance_ssd, 3,5,3, f1, f2, len, r, x
+    mov       xq, lenq
+    shl       xq, 2
+    neg       xq
+    add       f1q, xq
+    add       f2q, xq
+    xor       xq, xq
+    shl       lend, 1
+    add       lend, 1
+    shl       lend, 2
+    mov       rq, lenq
+    and       rq, mmsize - 1
+    xorps     m0, m0
+    cmp       lenq, mmsize
+    jl .loop1
+    sub       lenq, rq
+ALIGN 16
+    .loop0:
+        movups    m1, [f1q + xq]
+        movups    m2, [f2q + xq]
+        subps     m1, m2
+        mulps     m1, m1
+        addps     m0, m1
+        add       xq, mmsize
+        cmp       xq, lenq
+        jl .loop0
+
+    movhlps   xmm1, xmm0
+    addps     xmm0, xmm1
+    movss     xmm1, xmm0
+    shufps    xmm0, xmm0, 1
+    addss     xmm0, xmm1
+
+    cmp       rq, 0
+    je .end
+    add       lenq, rq
+    .loop1:
+        movss    xm1, [f1q + xq]
+        subss    xm1, [f2q + xq]
+        mulss    xm1, xm1
+        addss    xm0, xm1
+        add       xq, 4
+        cmp       xq, lenq
+        jl .loop1
+    .end:
+%if ARCH_X86_64 == 0
+    movss     r0m, xm0
+    fld dword r0m
+%endif
+    RET
diff --git a/libavfilter/x86/af_anlmdn_init.c b/libavfilter/x86/af_anlmdn_init.c
new file mode 100644
index 0000000000..30eff6f644
--- /dev/null
+++ b/libavfilter/x86/af_anlmdn_init.c
@@ -0,0 +1,35 @@ 
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavfilter/af_anlmdndsp.h"
+
+float ff_compute_distance_ssd_sse(const float *f1, const float *f2,
+                                  ptrdiff_t len);
+
+av_cold void ff_anlmdn_init_x86(AudioNLMDNDSPContext *s)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_SSE(cpu_flags)) {
+        s->compute_distance_ssd = ff_compute_distance_ssd_sse;
+    }
+}