diff mbox series

[FFmpeg-devel,09/11] checkasm: add av_tx FFT SIMD testing code

Message ID MYfnt-I--7-2@lynne.ee
State New
Headers show
Series lavu/tx: FFT improvements, additions and assembly
Related show

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate success Make fate finished
andriy/PPC64_make success Make finished
andriy/PPC64_make_fate success Make fate finished

Commit Message

Lynne April 19, 2021, 8:25 p.m. UTC
This sadly required making changes to the code itself, 
due to the same context needing to be reused for both versions.
The lookup table had to be duplicated for both versions.

Patch attached.
Subject: [PATCH 09/11] checkasm: add av_tx FFT SIMD testing code

This sadly required making changes to the code itself,
due to the same context needing to be reused for both versions.
The lookup table had to be duplicated for both versions.
---
 libavutil/tx.c            |  15 +++---
 libavutil/tx_priv.h       |   5 +-
 libavutil/tx_template.c   |  18 +++----
 tests/checkasm/Makefile   |   1 +
 tests/checkasm/av_tx.c    | 109 ++++++++++++++++++++++++++++++++++++++
 tests/checkasm/checkasm.c |   1 +
 tests/checkasm/checkasm.h |   1 +
 tests/fate/checkasm.mak   |   1 +
 8 files changed, 135 insertions(+), 16 deletions(-)
 create mode 100644 tests/checkasm/av_tx.c
diff mbox series

Patch

diff --git a/libavutil/tx.c b/libavutil/tx.c
index 6d0e854084..dcfb257899 100644
--- a/libavutil/tx.c
+++ b/libavutil/tx.c
@@ -106,22 +106,24 @@  int ff_tx_gen_ptwo_revtab(AVTXContext *s, int invert_lookup)
 {
     const int m = s->m, inv = s->inv;
 
-    if (!(s->revtab = av_malloc(m*sizeof(*s->revtab))))
+    if (!(s->revtab = av_malloc(s->m*sizeof(*s->revtab))))
+        return AVERROR(ENOMEM);
+    if (!(s->revtab_c = av_malloc(m*sizeof(*s->revtab_c))))
         return AVERROR(ENOMEM);
 
     /* Default */
     for (int i = 0; i < m; i++) {
         int k = -split_radix_permutation(i, m, inv) & (m - 1);
         if (invert_lookup)
-            s->revtab[i] = k;
+            s->revtab[i] = s->revtab_c[i] = k;
         else
-            s->revtab[k] = i;
+            s->revtab[i] = s->revtab_c[k] = i;
     }
 
     return 0;
 }
 
-int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s)
+int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s, int *revtab)
 {
     int nb_inplace_idx = 0;
 
@@ -130,7 +132,7 @@  int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s)
 
     /* The first coefficient is always already in-place */
     for (int src = 1; src < s->m; src++) {
-        int dst = s->revtab[src];
+        int dst = revtab[src];
         int found = 0;
 
         if (dst <= src)
@@ -146,7 +148,7 @@  int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s)
                     break;
                 }
             }
-            dst = s->revtab[dst];
+            dst = revtab[dst];
         } while (dst != src && !found);
 
         if (!found)
@@ -215,6 +217,7 @@  av_cold void av_tx_uninit(AVTXContext **ctx)
     av_free((*ctx)->pfatab);
     av_free((*ctx)->exptab);
     av_free((*ctx)->revtab);
+    av_free((*ctx)->revtab_c);
     av_free((*ctx)->inplace_idx);
     av_free((*ctx)->tmp);
 
diff --git a/libavutil/tx_priv.h b/libavutil/tx_priv.h
index b889f6d3b4..88589fcbb4 100644
--- a/libavutil/tx_priv.h
+++ b/libavutil/tx_priv.h
@@ -122,6 +122,9 @@  struct AVTXContext {
     int        *revtab; /* Input mapping for power of two transforms */
     int   *inplace_idx; /* Required indices to revtab for in-place transforms */
 
+    int      *revtab_c; /* Revtab for only the C transforms, needed because
+                         * checkasm makes us reuse the same context. */
+
     av_tx_fn    top_tx; /* Used for computing transforms derived from other
                          * transforms, like full-length iMDCTs and RDFTs.
                          * NOTE: Do NOT use this to mix assembly with C code. */
@@ -147,7 +150,7 @@  int ff_tx_gen_ptwo_revtab(AVTXContext *s, int invert_lookup);
  * specific order,  allows the revtab to be done in-place. AVTXContext->revtab
  * must already exist.
  */
-int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s);
+int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s, int *revtab);
 
 /*
  * This generates a parity-based revtab of length len and direction inv.
diff --git a/libavutil/tx_template.c b/libavutil/tx_template.c
index a68a84dcd5..cad66a8bc0 100644
--- a/libavutil/tx_template.c
+++ b/libavutil/tx_template.c
@@ -593,7 +593,7 @@  static void compound_fft_##N##xM(AVTXContext *s, void *_out,                   \
     for (int i = 0; i < m; i++) {                                              \
         for (int j = 0; j < N; j++)                                            \
             fft##N##in[j] = in[in_map[i*N + j]];                               \
-        fft##N(s->tmp + s->revtab[i], fft##N##in, m);                          \
+        fft##N(s->tmp + s->revtab_c[i], fft##N##in, m);                        \
     }                                                                          \
                                                                                \
     for (int i = 0; i < N; i++)                                                \
@@ -624,16 +624,16 @@  static void split_radix_fft(AVTXContext *s, void *_out, void *_in,
 
         do {
             tmp = out[src];
-            dst = s->revtab[src];
+            dst = s->revtab_c[src];
             do {
                 FFSWAP(FFTComplex, tmp, out[dst]);
-                dst = s->revtab[dst];
+                dst = s->revtab_c[dst];
             } while (dst != src); /* Can be > as well, but is less predictable */
             out[dst] = tmp;
         } while ((src = *inplace_idx++));
     } else {
         for (int i = 0; i < m; i++)
-            out[i] = in[s->revtab[i]];
+            out[i] = in[s->revtab_c[i]];
     }
 
     fft_dispatch[mb](out);
@@ -685,7 +685,7 @@  static void compound_imdct_##N##xM(AVTXContext *s, void *_dst, void *_src,     \
             FFTComplex tmp = { in2[-k*stride], in1[k*stride] };                \
             CMUL3(fft##N##in[j], tmp, exp[k >> 1]);                            \
         }                                                                      \
-        fft##N(s->tmp + s->revtab[i], fft##N##in, m);                          \
+        fft##N(s->tmp + s->revtab_c[i], fft##N##in, m);                        \
     }                                                                          \
                                                                                \
     for (int i = 0; i < N; i++)                                                \
@@ -733,7 +733,7 @@  static void compound_mdct_##N##xM(AVTXContext *s, void *_dst, void *_src,      \
             CMUL(fft##N##in[j].im, fft##N##in[j].re, tmp.re, tmp.im,           \
                  exp[k >> 1].re, exp[k >> 1].im);                              \
         }                                                                      \
-        fft##N(s->tmp + s->revtab[i], fft##N##in, m);                          \
+        fft##N(s->tmp + s->revtab_c[i], fft##N##in, m);                        \
     }                                                                          \
                                                                                \
     for (int i = 0; i < N; i++)                                                \
@@ -772,7 +772,7 @@  static void monolithic_imdct(AVTXContext *s, void *_dst, void *_src,
 
     for (int i = 0; i < m; i++) {
         FFTComplex tmp = { in2[-2*i*stride], in1[2*i*stride] };
-        CMUL3(z[s->revtab[i]], tmp, exp[i]);
+        CMUL3(z[s->revtab_c[i]], tmp, exp[i]);
     }
 
     fftp(z);
@@ -806,7 +806,7 @@  static void monolithic_mdct(AVTXContext *s, void *_dst, void *_src,
             tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]);
             tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]);
         }
-        CMUL(z[s->revtab[i]].im, z[s->revtab[i]].re, tmp.re, tmp.im,
+        CMUL(z[s->revtab_c[i]].im, z[s->revtab_c[i]].re, tmp.re, tmp.im,
              exp[i].re, exp[i].im);
     }
 
@@ -1005,7 +1005,7 @@  int TX_NAME(ff_tx_init_mdct_fft)(AVTXContext *s, av_tx_fn *tx,
         if (flags & AV_TX_INPLACE) {
             if (is_mdct) /* In-place MDCTs are not supported yet */
                 return AVERROR(ENOSYS);
-            if ((err = ff_tx_gen_ptwo_inplace_revtab_idx(s)))
+            if ((err = ff_tx_gen_ptwo_inplace_revtab_idx(s, s->revtab_c)))
                 return err;
         }
         for (int i = 4; i <= av_log2(m); i++)
diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index 1827a4e134..4ef5fa87da 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -50,6 +50,7 @@  SWSCALEOBJS                             += sw_rgb.o sw_scale.o
 CHECKASMOBJS-$(CONFIG_SWSCALE)  += $(SWSCALEOBJS)
 
 # libavutil tests
+AVUTILOBJS                              += av_tx.o
 AVUTILOBJS                              += fixed_dsp.o
 AVUTILOBJS                              += float_dsp.o
 
diff --git a/tests/checkasm/av_tx.c b/tests/checkasm/av_tx.c
new file mode 100644
index 0000000000..6ffbce2b4a
--- /dev/null
+++ b/tests/checkasm/av_tx.c
@@ -0,0 +1,109 @@ 
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "libavutil/mem_internal.h"
+#include "libavutil/tx.h"
+#include "libavutil/error.h"
+
+#include "checkasm.h"
+
+#define EPS 0.0001
+
+#define SCALE_NOOP(x) (x)
+#define SCALE_INT20(x) (av_clip64(lrintf((x) * 2147483648.0), INT32_MIN, INT32_MAX) >> 12)
+
+#define randomize_complex(BUF, LEN, TYPE, SCALE)                \
+    do {                                                        \
+        TYPE *buf = (TYPE *)BUF;                                \
+        for (int i = 0; i < LEN; i++) {                         \
+            double fre = (double)rnd() / UINT_MAX;              \
+            double fim = (double)rnd() / UINT_MAX;              \
+            buf[i] = (TYPE){ SCALE(fre), SCALE(fim) };          \
+        }                                                       \
+    } while (0)
+
+static const int check_lens[] = {
+    2, 4, 8, 16, 32, 64, 1024, 16384,
+    3*2, 5*2, 7*2, 9*2, 15*2,
+};
+
+#define CHECK_TEMPLATE(PREFIX, TYPE, DATA_TYPE, SCALE, LENGTHS, CHECK_EXPRESSION) \
+    do {                                                                          \
+        int err;                                                                  \
+        AVTXContext *tx;                                                          \
+        av_tx_fn fn;                                                              \
+        int num_checks = 0;                                                       \
+        int last_check = 0;                                                       \
+        const void *scale = &SCALE;                                               \
+                                                                                  \
+        for (int i = 0; i < FF_ARRAY_ELEMS(LENGTHS); i++) {                       \
+            int len = LENGTHS[i];                                                 \
+                                                                                  \
+            if ((err = av_tx_init(&tx, &fn, TYPE, 0, len, &scale, 0x0)) < 0) {    \
+                fprintf(stderr, "av_tx: %s\n", av_err2str(err));                  \
+                return;                                                           \
+            }                                                                     \
+                                                                                  \
+            if (check_func(fn, PREFIX "_%i", len)) {                              \
+                num_checks++;                                                     \
+                last_check = len;                                                 \
+                call_ref(tx, out_ref, in, sizeof(DATA_TYPE));                     \
+                call_new(tx, out_new, in, sizeof(DATA_TYPE));                     \
+                if (CHECK_EXPRESSION) {                                           \
+                    fail();                                                       \
+                    break;                                                        \
+                }                                                                 \
+                bench_new(tx, out_new, in, sizeof(DATA_TYPE));                    \
+            }                                                                     \
+                                                                                  \
+            av_tx_uninit(&tx);                                                    \
+            fn = NULL;                                                            \
+        }                                                                         \
+                                                                                  \
+        av_tx_uninit(&tx);                                                        \
+        fn = NULL;                                                                \
+                                                                                  \
+        if (num_checks == 1)                                                      \
+            report(PREFIX "_%i", last_check);                                     \
+        else if (num_checks)                                                      \
+            report(PREFIX);                                                       \
+    } while (0)
+
+void checkasm_check_av_tx(void)
+{
+    const float scale_float = 1.0f;
+    const double scale_double = 1.0f;
+
+    declare_func(void, AVTXContext *tx, void *out, void *in, ptrdiff_t stride);
+
+    void *in      = av_malloc(16384*2*8);
+    void *out_ref = av_malloc(16384*2*8);
+    void *out_new = av_malloc(16384*2*8);
+
+    randomize_complex(in, 16384, AVComplexFloat, SCALE_NOOP);
+    CHECK_TEMPLATE("float_fft", AV_TX_FLOAT_FFT, AVComplexFloat, scale_float, check_lens,
+                   !float_near_abs_eps_array(out_ref, out_new, EPS, len*2));
+
+    randomize_complex(in, 16384, AVComplexDouble, SCALE_NOOP);
+    CHECK_TEMPLATE("double_fft", AV_TX_DOUBLE_FFT, AVComplexDouble, scale_double, check_lens,
+                   !double_near_abs_eps_array(out_ref, out_new, EPS, len*2));
+
+    av_free(in);
+    av_free(out_ref);
+    av_free(out_new);
+}
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 8338e8ff58..e2e17d2b11 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -198,6 +198,7 @@  static const struct {
 #if CONFIG_AVUTIL
         { "fixed_dsp", checkasm_check_fixed_dsp },
         { "float_dsp", checkasm_check_float_dsp },
+        { "av_tx",     checkasm_check_av_tx },
 #endif
     { NULL }
 };
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index ef6645e3a2..0593d0edac 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -43,6 +43,7 @@  void checkasm_check_aacpsdsp(void);
 void checkasm_check_afir(void);
 void checkasm_check_alacdsp(void);
 void checkasm_check_audiodsp(void);
+void checkasm_check_av_tx(void);
 void checkasm_check_blend(void);
 void checkasm_check_blockdsp(void);
 void checkasm_check_bswapdsp(void);
diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
index 07f1d8238e..3108fcd510 100644
--- a/tests/fate/checkasm.mak
+++ b/tests/fate/checkasm.mak
@@ -2,6 +2,7 @@  FATE_CHECKASM = fate-checkasm-aacpsdsp                                  \
                 fate-checkasm-af_afir                                   \
                 fate-checkasm-alacdsp                                   \
                 fate-checkasm-audiodsp                                  \
+                fate-checkasm-av_tx                                     \
                 fate-checkasm-blockdsp                                  \
                 fate-checkasm-bswapdsp                                  \
                 fate-checkasm-exrdsp                                    \