diff mbox series

[FFmpeg-devel,1/6] lavc/ac3dsp: RISC-V V ac3_exponent_min

Message ID 20230615103645.25778-2-shenpeiting@eswincomputing.com
State New
Headers show
Series RISC-V initial ac3dsp | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

沈佩婷 June 15, 2023, 10:36 a.m. UTC
From: Shen Peiting <shenpeiting@eswincomputing.com>

Find scalar minium optimized by using RVV instructions

Benchmarks on Spike(cycles):
*exp=1280*4;num_reuse_blocks=5;nb_coefs=16
ac3_exponent_min_c: 1993
ac3_exponent_min_rvv: 258
*exp=1280*4;num_reuse_blocks=19;nb_coefs=255
ac3_exponent_min_c: 99010
ac3_exponent_min_rvv: 3843

The optimization performance is more obvious with the increase of number of
reuse blocks and number of coefs.

Co-Authored by: Yang Xiaojun <yangxiaojun@eswincomputing.com>
Co-Authored by: Huang Xing <huangxing1@eswincomputing.com>
Co-Authored by: Zeng Fanchen <zengfanchen@eswincomputing.com>
Signed-off-by: Shen Peiting <shenpeiting@eswincomputing.com>
---
 libavcodec/ac3dsp.c            |  2 ++
 libavcodec/ac3dsp.h            |  1 +
 libavcodec/riscv/Makefile      |  2 ++
 libavcodec/riscv/ac3dsp_init.c | 37 +++++++++++++++++++++++++++
 libavcodec/riscv/ac3dsp_rvv.S  | 46 ++++++++++++++++++++++++++++++++++
 5 files changed, 88 insertions(+)
 create mode 100644 libavcodec/riscv/ac3dsp_init.c
 create mode 100644 libavcodec/riscv/ac3dsp_rvv.S

Comments

Rémi Denis-Courmont June 15, 2023, 6:02 p.m. UTC | #1
Nihao

Le torstaina 15. kesäkuuta 2023, 13.36.40 EEST Peiting Shen a écrit :
> From: Shen Peiting <shenpeiting@eswincomputing.com>
> 
> Find scalar minium optimized by using RVV instructions
> 
> Benchmarks on Spike(cycles):
> *exp=1280*4;num_reuse_blocks=5;nb_coefs=16
> ac3_exponent_min_c: 1993
> ac3_exponent_min_rvv: 258
> *exp=1280*4;num_reuse_blocks=19;nb_coefs=255
> ac3_exponent_min_c: 99010
> ac3_exponent_min_rvv: 3843
> 
> The optimization performance is more obvious with the increase of number of
> reuse blocks and number of coefs.
> 
> Co-Authored by: Yang Xiaojun <yangxiaojun@eswincomputing.com>
> Co-Authored by: Huang Xing <huangxing1@eswincomputing.com>
> Co-Authored by: Zeng Fanchen <zengfanchen@eswincomputing.com>
> Signed-off-by: Shen Peiting <shenpeiting@eswincomputing.com>
> ---
>  libavcodec/ac3dsp.c            |  2 ++
>  libavcodec/ac3dsp.h            |  1 +
>  libavcodec/riscv/Makefile      |  2 ++
>  libavcodec/riscv/ac3dsp_init.c | 37 +++++++++++++++++++++++++++
>  libavcodec/riscv/ac3dsp_rvv.S  | 46 ++++++++++++++++++++++++++++++++++
>  5 files changed, 88 insertions(+)
>  create mode 100644 libavcodec/riscv/ac3dsp_init.c
>  create mode 100644 libavcodec/riscv/ac3dsp_rvv.S
> 
> diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c
> index 22cb5f242e..302b786b15 100644
> --- a/libavcodec/ac3dsp.c
> +++ b/libavcodec/ac3dsp.c
> @@ -395,5 +395,7 @@ av_cold void ff_ac3dsp_init(AC3DSPContext *c)
>      ff_ac3dsp_init_x86(c);
>  #elif ARCH_MIPS
>      ff_ac3dsp_init_mips(c);
> +#elif ARCH_RISCV
> +    ff_ac3dsp_init_riscv(c);
>  #endif
>  }
> diff --git a/libavcodec/ac3dsp.h b/libavcodec/ac3dsp.h
> index 33e51e202e..a01bff3d11 100644
> --- a/libavcodec/ac3dsp.h
> +++ b/libavcodec/ac3dsp.h
> @@ -109,6 +109,7 @@ void ff_ac3dsp_init    (AC3DSPContext *c);
>  void ff_ac3dsp_init_arm(AC3DSPContext *c);
>  void ff_ac3dsp_init_x86(AC3DSPContext *c);
>  void ff_ac3dsp_init_mips(AC3DSPContext *c);
> +void ff_ac3dsp_init_riscv(AC3DSPContext *c);
> 
>  void ff_ac3dsp_downmix(AC3DSPContext *c, float **samples, float **matrix,
>                         int out_ch, int in_ch, int len);
> diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
> index ee17a521fd..a627924cac 100644
> --- a/libavcodec/riscv/Makefile
> +++ b/libavcodec/riscv/Makefile
> @@ -1,5 +1,7 @@
>  OBJS-$(CONFIG_AAC_DECODER) += riscv/aacpsdsp_init.o
>  RVV-OBJS-$(CONFIG_AAC_DECODER) += riscv/aacpsdsp_rvv.o
> +OBJS-$(CONFIG_AC3DSP) += riscv/ac3dsp_init.o
> +RVV-OBJS-$(CONFIG_AC3DSP) += riscv/ac3dsp_rvv.o
>  OBJS-$(CONFIG_ALAC_DECODER) += riscv/alacdsp_init.o
>  RVV-OBJS-$(CONFIG_ALAC_DECODER) += riscv/alacdsp_rvv.o
>  OBJS-$(CONFIG_AUDIODSP) += riscv/audiodsp_init.o \
> diff --git a/libavcodec/riscv/ac3dsp_init.c b/libavcodec/riscv/ac3dsp_init.c
> new file mode 100644
> index 0000000000..bb67d86998
> --- /dev/null
> +++ b/libavcodec/riscv/ac3dsp_init.c
> @@ -0,0 +1,37 @@
> +/*
> + * Copyright 2023 Beijing ESWIN Computing Technology Co., Ltd.
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
> USA + */
> +#include <stdint.h>
> +
> +#include "libavutil/attributes.h"
> +#include "libavcodec/ac3dsp.h"
> +#include "libavutil/cpu.h"
> +#include "config.h"
> +
> +void ff_ac3_exponent_min_rvv(uint8_t *exp, int num_reuse_blocks, int
> nb_coefs); +
> +av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
> +{
> +    int flags = av_get_cpu_flags();
> +#if HAVE_RVV
> +    if (flags & AV_CPU_FLAG_RVV_I32)
> +        c->ac3_exponent_min = ff_ac3_exponent_min_rvv;
> +#endif
> +}
> +
> diff --git a/libavcodec/riscv/ac3dsp_rvv.S b/libavcodec/riscv/ac3dsp_rvv.S
> new file mode 100644
> index 0000000000..879123f4a7
> --- /dev/null
> +++ b/libavcodec/riscv/ac3dsp_rvv.S
> @@ -0,0 +1,46 @@
> +/*
> + * Copyright 2023 Beijing ESWIN Computing Technology Co., Ltd.
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
> USA + */
> +
> +#include "libavutil/riscv/asm.S"
> +
> +func ff_ac3_exponent_min_rvv, zve32x
> +    beq             a1, x0, 3f

Conventionally, we use ABI names for GP and FP registers like almost everybody 
else and their moms in RISC-V world. So that would be `zero`.

But in this case, you should use the `beqz` alias anyway.

> +    li              t0, 256
> +    addi            a1, a1, 1
> +1:
> +    mv              t2, a0

AFAICT, t2 is always the same as a0, and thus this is unnecessary.

> +    mv              t3, a1
> +    lb              t4, (t2)
> +2:
> +    vsetvli         t1, t3, e8, m8
> +    vlse8.v         v0, (t2), t0
> +    vmv.s.x         v8, t4
> +    sub             t3, t3, t1
> +    vredminu.vs     v8, v0, v8
> +    vmv.x.s         t4, v8
> +    bnez            t3, 2b
> +    vsetivli        t1, 1, e8

When you're not using the output, so use zero.

But you don't even need to reset the vector configuration here. Just use 
masking to store the one element (you could also transfer to scalar and store, 
but that's probably slower than masking).

> +    vse8.v          v8, (a0)
> +    addi            a0, a0, 1
> +    addi            a2, a2, -1

This will stall on an in-order CPU. Please avoid immediately consecutive 
interdependent instructions.

> +    bnez            a2, 1b
> +3:
> +    ret
> +endfunc
diff mbox series

Patch

diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c
index 22cb5f242e..302b786b15 100644
--- a/libavcodec/ac3dsp.c
+++ b/libavcodec/ac3dsp.c
@@ -395,5 +395,7 @@  av_cold void ff_ac3dsp_init(AC3DSPContext *c)
     ff_ac3dsp_init_x86(c);
 #elif ARCH_MIPS
     ff_ac3dsp_init_mips(c);
+#elif ARCH_RISCV
+    ff_ac3dsp_init_riscv(c);
 #endif
 }
diff --git a/libavcodec/ac3dsp.h b/libavcodec/ac3dsp.h
index 33e51e202e..a01bff3d11 100644
--- a/libavcodec/ac3dsp.h
+++ b/libavcodec/ac3dsp.h
@@ -109,6 +109,7 @@  void ff_ac3dsp_init    (AC3DSPContext *c);
 void ff_ac3dsp_init_arm(AC3DSPContext *c);
 void ff_ac3dsp_init_x86(AC3DSPContext *c);
 void ff_ac3dsp_init_mips(AC3DSPContext *c);
+void ff_ac3dsp_init_riscv(AC3DSPContext *c);
 
 void ff_ac3dsp_downmix(AC3DSPContext *c, float **samples, float **matrix,
                        int out_ch, int in_ch, int len);
diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index ee17a521fd..a627924cac 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -1,5 +1,7 @@ 
 OBJS-$(CONFIG_AAC_DECODER) += riscv/aacpsdsp_init.o
 RVV-OBJS-$(CONFIG_AAC_DECODER) += riscv/aacpsdsp_rvv.o
+OBJS-$(CONFIG_AC3DSP) += riscv/ac3dsp_init.o
+RVV-OBJS-$(CONFIG_AC3DSP) += riscv/ac3dsp_rvv.o
 OBJS-$(CONFIG_ALAC_DECODER) += riscv/alacdsp_init.o
 RVV-OBJS-$(CONFIG_ALAC_DECODER) += riscv/alacdsp_rvv.o
 OBJS-$(CONFIG_AUDIODSP) += riscv/audiodsp_init.o \
diff --git a/libavcodec/riscv/ac3dsp_init.c b/libavcodec/riscv/ac3dsp_init.c
new file mode 100644
index 0000000000..bb67d86998
--- /dev/null
+++ b/libavcodec/riscv/ac3dsp_init.c
@@ -0,0 +1,37 @@ 
+/*
+ * Copyright 2023 Beijing ESWIN Computing Technology Co., Ltd.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavcodec/ac3dsp.h"
+#include "libavutil/cpu.h"
+#include "config.h"
+
+void ff_ac3_exponent_min_rvv(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
+
+av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
+{
+    int flags = av_get_cpu_flags();
+#if HAVE_RVV
+    if (flags & AV_CPU_FLAG_RVV_I32)
+        c->ac3_exponent_min = ff_ac3_exponent_min_rvv;
+#endif
+}
+
diff --git a/libavcodec/riscv/ac3dsp_rvv.S b/libavcodec/riscv/ac3dsp_rvv.S
new file mode 100644
index 0000000000..879123f4a7
--- /dev/null
+++ b/libavcodec/riscv/ac3dsp_rvv.S
@@ -0,0 +1,46 @@ 
+/*
+ * Copyright 2023 Beijing ESWIN Computing Technology Co., Ltd.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+func ff_ac3_exponent_min_rvv, zve32x
+    beq             a1, x0, 3f
+    li              t0, 256
+    addi            a1, a1, 1
+1:
+    mv              t2, a0
+    mv              t3, a1
+    lb              t4, (t2)
+2:
+    vsetvli         t1, t3, e8, m8
+    vlse8.v         v0, (t2), t0
+    vmv.s.x         v8, t4
+    sub             t3, t3, t1
+    vredminu.vs     v8, v0, v8
+    vmv.x.s         t4, v8
+    bnez            t3, 2b
+    vsetivli        t1, 1, e8
+    vse8.v          v8, (a0)
+    addi            a0, a0, 1
+    addi            a2, a2, -1
+    bnez            a2, 1b
+3:
+    ret
+endfunc