@@ -28,6 +28,9 @@
void ff_int32_to_float_fmul_scalar_rvv(float *dst, const int32_t *src,
float mul, int len);
+void ff_int32_to_float_fmul_array8_rvv(FmtConvertContext *c, float *dst,
+ const int32_t *src, const float *mul,
+ int len);
av_cold void ff_fmt_convert_init_riscv(FmtConvertContext *c,
AVCodecContext *avctx)
@@ -35,7 +38,9 @@ av_cold void ff_fmt_convert_init_riscv(FmtConvertContext *c,
#ifdef HAVE_RVV
int flags = av_get_cpu_flags();
- if (flags & AV_CPU_FLAG_RV_ZVE32F)
+ if (flags & AV_CPU_FLAG_RV_ZVE32F) {
c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_rvv;
+ c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_rvv;
+ }
#endif
}
@@ -38,3 +38,32 @@ NOHWF mv a2, a3
ret
endfunc
+
+func ff_int32_to_float_fmul_array8_rvv, zve32f
+ srai a4, a4, 3
+
+1: vsetvli t0, a4, e32, m1, ta, ma
+ vle32.v v24, (a3)
+ slli t1, t0, 2
+ vlseg8e32.v v16, (a2)
+ slli t2, t0, 2 + 3
+ vsetvli t3, zero, e32, m8, ta, ma
+ vfcvt.f.x.v v16, v16
+ add a3, a3, t1
+ vsetvli t0, a4, e32, m1, ta, ma
+ vfmul.vv v16, v16, v24
+ add a2, a2, t2
+ vfmul.vv v17, v17, v24
+ sub a4, a4, t0
+ vfmul.vv v18, v18, v24
+ vfmul.vv v19, v19, v24
+ vfmul.vv v20, v20, v24
+ vfmul.vv v21, v21, v24
+ vfmul.vv v22, v22, v24
+ vfmul.vv v23, v23, v24
+ vsseg8e32.v v16, (a1)
+ add a1, a1, t2
+ bnez a4, 1b
+
+ ret
+endfunc
From: Rémi Denis-Courmont <remi@remlab.net> --- libavcodec/riscv/fmtconvert_init.c | 7 ++++++- libavcodec/riscv/fmtconvert_rvv.S | 29 +++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-)