diff mbox series

[FFmpeg-devel,v4,3/5] avcodec/ac3: Implement ac3_extract_exponents for aarch64 NEON

Message ID cc93f613-90a0-4d85-8530-d2cdebe1c624@geoffhill.org
State Accepted
Commit 69cb34f8859ed62fd1c46d3612912a334542fe28
Headers show
Series avcodec/ac3: Add aarch64 NEON DSP | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Geoff Hill April 6, 2024, 2:26 p.m. UTC
Signed-off-by: Geoff Hill <geoff@geoffhill.org>
---
 libavcodec/aarch64/ac3dsp_init_aarch64.c |  2 ++
 libavcodec/aarch64/ac3dsp_neon.S         | 14 +++++++++
 tests/checkasm/ac3dsp.c                  | 38 ++++++++++++++++++++++++
 3 files changed, 54 insertions(+)
diff mbox series

Patch

diff --git a/libavcodec/aarch64/ac3dsp_init_aarch64.c b/libavcodec/aarch64/ac3dsp_init_aarch64.c
index 8874b41393..1bdc215b51 100644
--- a/libavcodec/aarch64/ac3dsp_init_aarch64.c
+++ b/libavcodec/aarch64/ac3dsp_init_aarch64.c
@@ -26,6 +26,7 @@ 
 #include "config.h"
 
 void ff_ac3_exponent_min_neon(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
+void ff_ac3_extract_exponents_neon(uint8_t *exp, int32_t *coef, int nb_coefs);
 void ff_float_to_fixed24_neon(int32_t *dst, const float *src, size_t len);
 
 av_cold void ff_ac3dsp_init_aarch64(AC3DSPContext *c)
@@ -34,5 +35,6 @@  av_cold void ff_ac3dsp_init_aarch64(AC3DSPContext *c)
     if (!have_neon(cpu_flags)) return;
 
     c->ac3_exponent_min = ff_ac3_exponent_min_neon;
+    c->extract_exponents = ff_ac3_extract_exponents_neon;
     c->float_to_fixed24 = ff_float_to_fixed24_neon;
 }
diff --git a/libavcodec/aarch64/ac3dsp_neon.S b/libavcodec/aarch64/ac3dsp_neon.S
index f916c32538..c350c1f173 100644
--- a/libavcodec/aarch64/ac3dsp_neon.S
+++ b/libavcodec/aarch64/ac3dsp_neon.S
@@ -37,6 +37,20 @@  function ff_ac3_exponent_min_neon, export=1
 3:      ret
 endfunc
 
+function ff_ac3_extract_exponents_neon, export=1
+        movi            v1.4s, #8
+1:      ld1             {v0.4s}, [x1], #16
+        abs             v0.4s, v0.4s
+        clz             v0.4s, v0.4s
+        sub             v0.4s, v0.4s, v1.4s
+        xtn             v0.4h, v0.4s
+        xtn             v0.8b, v0.8h
+        st1             {v0.s}[0], [x0], #4
+        subs            w2, w2, #4
+        b.gt            1b
+        ret
+endfunc
+
 function ff_float_to_fixed24_neon, export=1
 1:      ld1             {v0.4s, v1.4s}, [x1], #32
         fcvtzs          v0.4s, v0.4s, #24
diff --git a/tests/checkasm/ac3dsp.c b/tests/checkasm/ac3dsp.c
index 06f31339f9..dc1b169e68 100644
--- a/tests/checkasm/ac3dsp.c
+++ b/tests/checkasm/ac3dsp.c
@@ -19,6 +19,7 @@ 
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
+#include <stdint.h>
 #include <string.h>
 
 #include "libavutil/mem.h"
@@ -36,6 +37,16 @@ 
         }                              \
     } while (0)
 
+#define randomize_i24(buf, len)          \
+    do {                                 \
+        int i;                           \
+        for (i = 0; i < len; i++) {      \
+            int32_t v = (int32_t)rnd();  \
+            int32_t u = (v & 0xFFFFFF);  \
+            buf[i] = (v < 0) ? -u : u;   \
+        }                                \
+    } while (0)
+
 #define randomize_float(buf, len)                               \
     do {                                                        \
         int i;                                                  \
@@ -77,6 +88,32 @@  static void check_ac3_exponent_min(AC3DSPContext *c) {
     report("ac3_exponent_min");
 }
 
+static void check_ac3_extract_exponents(AC3DSPContext *c) {
+#define MAX_EXPS 3072
+    LOCAL_ALIGNED_16(int32_t, src, [MAX_EXPS]);
+    LOCAL_ALIGNED_16(uint8_t, v1, [MAX_EXPS]);
+    LOCAL_ALIGNED_16(uint8_t, v2, [MAX_EXPS]);
+    int n;
+
+    declare_func(void, uint8_t *, int32_t *, int);
+
+    for (n = 512; n <= MAX_EXPS; n += 256) {
+        if (check_func(c->extract_exponents, "ac3_extract_exponents_n%d", n)) {
+            randomize_i24(src, n);
+
+            call_ref(v1, src, n);
+            call_new(v2, src, n);
+
+            if (memcmp(v1, v2, n) != 0)
+                fail();
+
+            bench_new(v1, src, n);
+        }
+    }
+
+    report("ac3_extract_exponents");
+}
+
 static void check_float_to_fixed24(AC3DSPContext *c) {
 #define BUF_SIZE 1024
     LOCAL_ALIGNED_32(float, src, [BUF_SIZE]);
@@ -108,5 +145,6 @@  void checkasm_check_ac3dsp(void)
     ff_ac3dsp_init(&c);
 
     check_ac3_exponent_min(&c);
+    check_ac3_extract_exponents(&c);
     check_float_to_fixed24(&c);
 }