diff mbox series

[FFmpeg-devel,6/6] lavc/ac3dsp: RISC-V B ac3_extract_exponents

Message ID 20230615103645.25778-7-shenpeiting@eswincomputing.com
State New
Headers show
Series RISC-V initial ac3dsp | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

沈佩婷 June 15, 2023, 10:36 a.m. UTC
From: Shen Peiting <shenpeiting@eswincomputing.com>

Use RVB instruction clz to calculate the number of leading zeros of MSB instead of av_log2.

Benchmarks on Spike(cycles):
ac3_extract_exponents_c: 8226
ac3_extract_exponents_rvb: 1167

Co-Authored by: Yang Xiaojun <yangxiaojun@eswincomputing.com>
Co-Authored by: Huang Xing <huangxing1@eswincomputing.com>
Co-Authored by: Zeng Fanchen <zengfanchen@eswincomputing.com>
Signed-off-by: Shen Peiting <shenpeiting@eswincomputing.com>
---
 libavcodec/riscv/Makefile      |  3 ++-
 libavcodec/riscv/ac3dsp_init.c |  3 +++
 libavcodec/riscv/ac3dsp_rvb.S  | 42 ++++++++++++++++++++++++++++++++++
 3 files changed, 47 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/riscv/ac3dsp_rvb.S

Comments

Rémi Denis-Courmont June 15, 2023, 7:18 p.m. UTC | #1
Le torstaina 15. kesäkuuta 2023, 13.36.45 EEST Peiting Shen a écrit :
> From: Shen Peiting <shenpeiting@eswincomputing.com>
> 
> Use RVB instruction clz to calculate the number of leading zeros of MSB
> instead of av_log2.
> 
> Benchmarks on Spike(cycles):
> ac3_extract_exponents_c: 8226
> ac3_extract_exponents_rvb: 1167

FWIW, RV-Zbb can be benchmarked on real hardware.

I would have done it already if only there was a checkasm case for this.
diff mbox series

Patch

diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index a627924cac..3d0c196cb9 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -1,7 +1,8 @@ 
 OBJS-$(CONFIG_AAC_DECODER) += riscv/aacpsdsp_init.o
 RVV-OBJS-$(CONFIG_AAC_DECODER) += riscv/aacpsdsp_rvv.o
 OBJS-$(CONFIG_AC3DSP) += riscv/ac3dsp_init.o
-RVV-OBJS-$(CONFIG_AC3DSP) += riscv/ac3dsp_rvv.o
+RVV-OBJS-$(CONFIG_AC3DSP) += riscv/ac3dsp_rvv.o \
+                             riscv/ac3dsp_rvb.o
 OBJS-$(CONFIG_ALAC_DECODER) += riscv/alacdsp_init.o
 RVV-OBJS-$(CONFIG_ALAC_DECODER) += riscv/alacdsp_rvv.o
 OBJS-$(CONFIG_AUDIODSP) += riscv/audiodsp_init.o \
diff --git a/libavcodec/riscv/ac3dsp_init.c b/libavcodec/riscv/ac3dsp_init.c
index 4769213ebc..75cd3c7e11 100644
--- a/libavcodec/riscv/ac3dsp_init.c
+++ b/libavcodec/riscv/ac3dsp_init.c
@@ -26,6 +26,7 @@ 
 
 void ff_ac3_exponent_min_rvv(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
 void ff_float_to_fixed24_rvv(int32_t *dst, const float *src, unsigned int len);
+void ff_ac3_extract_exponents_rvb(uint8_t *exp, int32_t *coef, int nb_coefs);
 void ff_ac3_sum_square_butterfly_int32_rvv(int64_t sum[4],
                                             const int32_t *coef0,
                                             const int32_t *coef1,
@@ -40,6 +41,8 @@  void ff_ac3_compute_mantissa_size_rvv(uint16_t mant_cnt[6][16]);
 av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
 {
     int flags = av_get_cpu_flags();
+    if (flags & AV_CPU_FLAG_RVB_BASIC)
+        c->extract_exponents = ff_ac3_extract_exponents_rvb;
 #if HAVE_RVV
     if (flags & AV_CPU_FLAG_RVV_I32) {
         c->ac3_exponent_min = ff_ac3_exponent_min_rvv;
diff --git a/libavcodec/riscv/ac3dsp_rvb.S b/libavcodec/riscv/ac3dsp_rvb.S
new file mode 100644
index 0000000000..3bf24c7392
--- /dev/null
+++ b/libavcodec/riscv/ac3dsp_rvb.S
@@ -0,0 +1,42 @@ 
+/*
+ * Copyright 2023 Beijing ESWIN Computing Technology Co., Ltd.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/riscv/asm.S"
+
+func ff_ac3_extract_exponents_rvb, zbb
+    li               t1, __riscv_xlen - 24
+1:
+    lw               t0, (a1)
+    bgez             t0, 2f
+    neg              t0, t0
+
+2:
+    clz              t4, t0
+    sub              t4, t4, t1
+    sb               t4,(a0)
+    addi             a2, a2, -1
+    addi             a1, a1, 4
+    addi             a0, a0, 1
+
+    bgtz             a2, 1b
+
+    ret
+endfunc
\ No newline at end of file