diff mbox series

[FFmpeg-devel] checkasm: add lossless audio DSP

Message ID 20231112194553.23248-1-remi@remlab.net
State Accepted
Commit 6720a509a77f1a077c4716f331049259cb242830
Headers show
Series [FFmpeg-devel] checkasm: add lossless audio DSP | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Rémi Denis-Courmont Nov. 12, 2023, 7:45 p.m. UTC
---
 tests/checkasm/Makefile   |   1 +
 tests/checkasm/checkasm.c |   3 +
 tests/checkasm/checkasm.h |   1 +
 tests/checkasm/llauddsp.c | 115 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 120 insertions(+)
 create mode 100644 tests/checkasm/llauddsp.c

Comments

Rémi Denis-Courmont Nov. 13, 2023, 6:41 a.m. UTC | #1
Hi,

This seems to show that the SSSE3 optimisation is no better than the SSE2, at least on my AMD Ryzen. Does anyone know why it's there? Should it be purged?

Br,
Paul B Mahol Nov. 13, 2023, 9:07 a.m. UTC | #2
On Mon, Nov 13, 2023 at 7:42 AM Rémi Denis-Courmont <remi@remlab.net> wrote:

> Hi,
>
> This seems to show that the SSSE3 optimisation is no better than the SSE2,
> at least on my AMD Ryzen. Does anyone know why it's there? Should it be
> purged?
>
>
Not everybody have/use AMD Ryzen.


> Br,
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Rémi Denis-Courmont Nov. 13, 2023, 9:17 a.m. UTC | #3
Le 13 novembre 2023 11:07:21 GMT+02:00, Paul B Mahol <onemda@gmail.com> a écrit :
>On Mon, Nov 13, 2023 at 7:42 AM Rémi Denis-Courmont <remi@remlab.net> wrote:
>
>> Hi,
>>
>> This seems to show that the SSSE3 optimisation is no better than the SSE2,
>> at least on my AMD Ryzen. Does anyone know why it's there? Should it be
>> purged?
>>
>>
>Not everybody have/use AMD Ryzen.

That's not really answering the questions. Instead it just sounds passive-aggressive TBH. Well, somebody else did answer them on IRC anyway.
Rémi Denis-Courmont Nov. 13, 2023, 3:38 p.m. UTC | #4
Le maanantaina 13. marraskuuta 2023, 11.17.57 EET Rémi Denis-Courmont a écrit 
:
> Le 13 novembre 2023 11:07:21 GMT+02:00, Paul B Mahol <onemda@gmail.com> a 
écrit :
> >On Mon, Nov 13, 2023 at 7:42 AM Rémi Denis-Courmont <remi@remlab.net> 
wrote:
> >> Hi,
> >> 
> >> This seems to show that the SSSE3 optimisation is no better than the
> >> SSE2,
> >> at least on my AMD Ryzen. Does anyone know why it's there? Should it be
> >> purged?
> >
> >Not everybody have/use AMD Ryzen.
> 
> That's not really answering the questions. Instead it just sounds
> passive-aggressive TBH. Well, somebody else did answer them on IRC anyway.

It has become apparent that this message of mine has been detrimentally over-
interpreted. I am sorry that it came out that way.
diff mbox series

Patch

diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index 594db4df9d..8bc241d29b 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -10,6 +10,7 @@  AVCODECOBJS-$(CONFIG_H264DSP)           += h264dsp.o
 AVCODECOBJS-$(CONFIG_H264PRED)          += h264pred.o
 AVCODECOBJS-$(CONFIG_H264QPEL)          += h264qpel.o
 AVCODECOBJS-$(CONFIG_IDCTDSP)           += idctdsp.o
+AVCODECOBJS-$(CONFIG_LLAUDDSP)          += llauddsp.o
 AVCODECOBJS-$(CONFIG_LLVIDDSP)          += llviddsp.o
 AVCODECOBJS-$(CONFIG_LLVIDENCDSP)       += llviddspenc.o
 AVCODECOBJS-$(CONFIG_LPC)               += lpc.o
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 51e97686cb..0cff1c45cb 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -143,6 +143,9 @@  static const struct {
     #if CONFIG_JPEG2000_DECODER
         { "jpeg2000dsp", checkasm_check_jpeg2000dsp },
     #endif
+    #if CONFIG_LLAUDDSP
+        { "llauddsp", checkasm_check_llauddsp },
+    #endif
     #if CONFIG_HUFFYUVDSP
         { "llviddsp", checkasm_check_llviddsp },
     #endif
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index 5638a7fa87..c379ac3c0c 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -70,6 +70,7 @@  void checkasm_check_hevc_sao(void);
 void checkasm_check_huffyuvdsp(void);
 void checkasm_check_idctdsp(void);
 void checkasm_check_jpeg2000dsp(void);
+void checkasm_check_llauddsp(void);
 void checkasm_check_llviddsp(void);
 void checkasm_check_llviddspenc(void);
 void checkasm_check_lpc(void);
diff --git a/tests/checkasm/llauddsp.c b/tests/checkasm/llauddsp.c
new file mode 100644
index 0000000000..e6eab589b2
--- /dev/null
+++ b/tests/checkasm/llauddsp.c
@@ -0,0 +1,115 @@ 
+/*
+ * Copyright (c) 2016 Alexandra Hájková
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mem.h"
+#include "libavutil/mem_internal.h"
+
+#include "libavcodec/lossless_audiodsp.h"
+
+#include "checkasm.h"
+
+#define randomize_buf(buf, len) \
+    do { \
+        for (int i = 0; i < len; i++) \
+            buf[i] = rnd(); \
+    } while (0)
+
+static void check_scalarproduct_and_madd_int16(LLAudDSPContext *c)
+{
+#define BUF_SIZE 1088 // multiple of 16
+    LOCAL_ALIGNED_16(int16_t, v1, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(int16_t, v2, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(int16_t, v3, [BUF_SIZE]);
+    int mul;
+
+    declare_func(int32_t, int16_t *, const int16_t *, const int16_t *,
+                          int, int);
+
+    randomize_buf(v1, BUF_SIZE);
+    randomize_buf(v2, BUF_SIZE);
+    randomize_buf(v3, BUF_SIZE);
+    mul = rnd();
+
+    if (check_func(c->scalarproduct_and_madd_int16,
+                   "scalarproduct_and_madd_int16")) {
+        LOCAL_ALIGNED_16(int16_t, dst0, [BUF_SIZE]);
+        LOCAL_ALIGNED_16(int16_t, dst1, [BUF_SIZE]);
+        int ref, val;
+
+        memcpy(dst0, v1, sizeof (*dst0) * BUF_SIZE);
+        memcpy(dst1, v1, sizeof (*dst1) * BUF_SIZE);
+        ref = call_ref(dst0, v2, v3, BUF_SIZE, mul);
+        val = call_new(dst1, v2, v3, BUF_SIZE, mul);
+        if (memcmp(dst0, dst1, sizeof (*dst0) * BUF_SIZE) != 0 || ref != val)
+            fail();
+
+        bench_new(v1, v2, v3, BUF_SIZE, mul);
+    }
+
+    report("scalarproduct_and_madd_int16");
+}
+
+static void check_scalarproduct_and_madd_int32(LLAudDSPContext *c)
+{
+#define BUF_SIZE 1088 // multiple of 16
+    LOCAL_ALIGNED_16(int16_t, v1, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(int32_t, v2, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(int16_t, v3, [BUF_SIZE]);
+    int mul;
+
+    declare_func(int32_t, int16_t *, const int32_t *, const int16_t *,
+                          int, int);
+
+    randomize_buf(v1, BUF_SIZE);
+    randomize_buf(v2, BUF_SIZE);
+    randomize_buf(v3, BUF_SIZE);
+    mul = rnd();
+
+    if (check_func(c->scalarproduct_and_madd_int32,
+                   "scalarproduct_and_madd_int32")) {
+        LOCAL_ALIGNED_16(int16_t, dst0, [BUF_SIZE]);
+        LOCAL_ALIGNED_16(int16_t, dst1, [BUF_SIZE]);
+        int ref, val;
+
+        memcpy(dst0, v1, sizeof (*dst0) * BUF_SIZE);
+        memcpy(dst1, v1, sizeof (*dst1) * BUF_SIZE);
+        ref = call_ref(dst0, v2, v3, BUF_SIZE, mul);
+        val = call_new(dst1, v2, v3, BUF_SIZE, mul);
+        if (memcmp(dst0, dst1, sizeof (*dst0) * BUF_SIZE) != 0 || ref != val)
+            fail();
+
+        bench_new(v1, v2, v3, BUF_SIZE, mul);
+    }
+
+    report("scalarproduct_and_madd_int32");
+}
+
+void checkasm_check_llauddsp(void)
+{
+    LLAudDSPContext c;
+
+    ff_llauddsp_init(&c);
+    check_scalarproduct_and_madd_int16(&c);
+    check_scalarproduct_and_madd_int32(&c);
+}