new file mode 100644
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/riscv/h26x/asm.S"
+
+.macro put_uni_pixels w, vlen, id
+\id\w\vlen:
+.if \w == 128 && \vlen == 128
+ li t0, \w
+ vsetvli zero, t0, e8, m8, ta, ma
+.else
+ vsetvlstatic8 \w, \vlen
+.endif
+1:
+ vle8.v v0, (a2)
+ addi a4, a4, -1
+ vse8.v v0, (a0)
+ add a2, a2, a3
+ add a0, a0, a1
+ bnez a4, 1b
+ ret
+.endm
+
+.macro func_put_uni_pixels vlen
+func ff_h2656_put_uni_pixels_8_rvv_\vlen\(), zve32x, zbb, zba
+ lpad 0
+ POW2_JMP_TABLE 4, \vlen
+ POW2_J \vlen, 4, a7
+ .irp w,2,4,8,16,32,64,128
+ put_uni_pixels \w, \vlen, 4
+ .endr
+endfunc
+.endm
+
+func_put_uni_pixels 256
+func_put_uni_pixels 128
new file mode 100644
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_RISCV_H26X_H2656DSP_H
+#define AVCODEC_RISCV_H26X_H2656DSP_H
+
+#define H2656_PEL_PROTOTYPE(name, D, opt) \
+void ff_h2656_put_uni_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, int width) \
+
+#define H2656_MC_8TAP_PROTOTYPES(fname, bitd, opt) \
+ H2656_PEL_PROTOTYPE(fname, bitd, opt); \
+
+H2656_MC_8TAP_PROTOTYPES(pixels , 8, rvv_256);
+H2656_MC_8TAP_PROTOTYPES(pixels , 8, rvv_128);
+
+#endif
@@ -1,2 +1,3 @@
OBJS-$(CONFIG_VVC_DECODER) += riscv/vvc/vvcdsp_init.o
-RVV-OBJS-$(CONFIG_VVC_DECODER) += riscv/vvc/vvc_mc_rvv.o
+RVV-OBJS-$(CONFIG_VVC_DECODER) += riscv/vvc/vvc_mc_rvv.o \
+ riscv/h26x/h2656_inter_rvv.o
@@ -25,6 +25,7 @@
#include "libavutil/riscv/cpu.h"
#include "libavcodec/vvc/dsp.h"
#include "libavcodec/vvc/dec.h"
+#include "libavcodec/riscv/h26x/h2656dsp.h"
#define bf(fn, bd, opt) fn##_##bd##_##opt
@@ -72,8 +73,12 @@ PUT_PIXELS_PROTOTYPES2(8, rvv_256)
c->inter.dst[C][w][idx1][idx2] = a; \
} while (0) \
+#define DIR_FUNCS(d, C, opt) \
+ PEL_FUNC(put_##d, C, 0, 0, ff_h2656_put_##d##_pixels_8_##opt); \
+
#define FUNCS(C, opt) \
PEL_FUNC(put, C, 0, 0, ff_vvc_put_pixels_8_##opt); \
+ DIR_FUNCS(uni, C, opt); \
void ff_vvc_dsp_init_riscv(VVCDSPContext *const c, const int bd)
{
From: sunyuechi <sunyuechi@iscas.ac.cn> k230 banana_f3 put_uni_pixels_chroma_8_4x4_c: 128.3 ( 1.00x) 90.5 ( 1.00x) put_uni_pixels_chroma_8_4x4_rvv_i32: 17.6 ( 7.30x) 17.4 ( 5.18x) put_uni_pixels_chroma_8_8x8_c: 295.1 ( 1.00x) 163.2 ( 1.00x) put_uni_pixels_chroma_8_8x8_rvv_i32: 35.8 ( 8.24x) 27.9 ( 5.84x) put_uni_pixels_chroma_8_16x16_c: 619.3 ( 1.00x) 267.4 ( 1.00x) put_uni_pixels_chroma_8_16x16_rvv_i32: 72.8 ( 8.50x) 48.7 ( 5.49x) put_uni_pixels_chroma_8_32x32_c: 1433.8 ( 1.00x) 538.2 ( 1.00x) put_uni_pixels_chroma_8_32x32_rvv_i32: 230.3 ( 6.23x) 236.2 ( 2.28x) put_uni_pixels_chroma_8_64x64_c: 3517.3 ( 1.00x) 1455.0 ( 1.00x) put_uni_pixels_chroma_8_64x64_rvv_i32: 813.6 ( 4.32x) 590.2 ( 2.47x) put_uni_pixels_chroma_8_128x128_c: 10174.6 ( 1.00x) 5798.7 ( 1.00x) put_uni_pixels_chroma_8_128x128_rvv_i32: 2989.3 ( 3.40x) 2371.4 ( 2.45x) put_uni_pixels_luma_8_4x4_c: 128.6 ( 1.00x) 90.5 ( 1.00x) put_uni_pixels_luma_8_4x4_rvv_i32: 17.3 ( 7.42x) 17.4 ( 5.18x) put_uni_pixels_luma_8_8x8_c: 295.1 ( 1.00x) 142.4 ( 1.00x) put_uni_pixels_luma_8_8x8_rvv_i32: 26.6 (11.10x) 27.9 ( 5.10x) put_uni_pixels_luma_8_16x16_c: 600.6 ( 1.00x) 277.7 ( 1.00x) put_uni_pixels_luma_8_16x16_rvv_i32: 82.1 ( 7.32x) 48.7 ( 5.70x) put_uni_pixels_luma_8_32x32_c: 1406.1 ( 1.00x) 528.0 ( 1.00x) put_uni_pixels_luma_8_32x32_rvv_i32: 230.3 ( 6.10x) 131.9 ( 4.00x) put_uni_pixels_luma_8_64x64_c: 4600.6 ( 1.00x) 1309.2 ( 1.00x) put_uni_pixels_luma_8_64x64_rvv_i32: 1073.1 ( 4.29x) 382.2 ( 3.43x) put_uni_pixels_luma_8_128x128_c: 11350.3 ( 1.00x) 3506.9 ( 1.00x) put_uni_pixels_luma_8_128x128_rvv_i32: 3119.1 ( 3.64x) 2017.5 ( 1.74x) --- libavcodec/riscv/h26x/h2656_inter_rvv.S | 53 +++++++++++++++++++++++++ libavcodec/riscv/h26x/h2656dsp.h | 33 +++++++++++++++ libavcodec/riscv/vvc/Makefile | 3 +- libavcodec/riscv/vvc/vvcdsp_init.c | 5 +++ 4 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 libavcodec/riscv/h26x/h2656_inter_rvv.S create mode 100644 libavcodec/riscv/h26x/h2656dsp.h