Message ID | tencent_FDD6731858AF61F23CD1AA2D168D4373C508@qq.com |
---|---|
State | New |
Headers | show |
Series | None | expand |
> Up to 64-bit rows, you can use strided loads and stores here. Due to the SRC_OFFSET in testing, only e8 and e16 can be loaded; e32 cannot be loaded (Bus error). Since the width ranges from 4 to 128, it seems that strided loads may not be possible. > Though for memory copying, unaligned scalar accesses might be just as fast. > Or perhaps not if the vectors are not aligned but vectors should not be > necessary here. This is especially true on the BPi whose memory bus is rather > slow, so even scalar copy can saturate it. I agree in theory, but since the test results seem to show some effect, it would be great if we could improve the testing to confirm it actually has no effect... <uk7b@foxmail.com> 于2024年10月29日周二 01:08写道: > From: sunyuechi <sunyuechi@iscas.ac.cn> > > k230 > banana_f3 > put_uni_pixels_chroma_8_4x4_c: 128.3 ( 1.00x) > 90.5 ( 1.00x) > put_uni_pixels_chroma_8_4x4_rvv_i32: 17.6 ( 7.30x) > 17.4 ( 5.18x) > put_uni_pixels_chroma_8_8x8_c: 295.1 ( 1.00x) > 163.2 ( 1.00x) > put_uni_pixels_chroma_8_8x8_rvv_i32: 35.8 ( 8.24x) > 27.9 ( 5.84x) > put_uni_pixels_chroma_8_16x16_c: 619.3 ( 1.00x) > 267.4 ( 1.00x) > put_uni_pixels_chroma_8_16x16_rvv_i32: 72.8 ( 8.50x) > 48.7 ( 5.49x) > put_uni_pixels_chroma_8_32x32_c: 1433.8 ( 1.00x) > 538.2 ( 1.00x) > put_uni_pixels_chroma_8_32x32_rvv_i32: 230.3 ( 6.23x) > 236.2 ( 2.28x) > put_uni_pixels_chroma_8_64x64_c: 3517.3 ( 1.00x) > 1455.0 ( 1.00x) > put_uni_pixels_chroma_8_64x64_rvv_i32: 813.6 ( 4.32x) > 590.2 ( 2.47x) > put_uni_pixels_chroma_8_128x128_c: 10174.6 ( 1.00x) > 5798.7 ( 1.00x) > put_uni_pixels_chroma_8_128x128_rvv_i32: 2989.3 ( 3.40x) > 2371.4 ( 2.45x) > put_uni_pixels_luma_8_4x4_c: 128.6 ( 1.00x) > 90.5 ( 1.00x) > put_uni_pixels_luma_8_4x4_rvv_i32: 17.3 ( 7.42x) > 17.4 ( 5.18x) > put_uni_pixels_luma_8_8x8_c: 295.1 ( 1.00x) > 142.4 ( 1.00x) > put_uni_pixels_luma_8_8x8_rvv_i32: 26.6 (11.10x) > 27.9 ( 5.10x) > put_uni_pixels_luma_8_16x16_c: 600.6 ( 1.00x) > 277.7 ( 1.00x) > put_uni_pixels_luma_8_16x16_rvv_i32: 82.1 ( 7.32x) > 48.7 ( 5.70x) > put_uni_pixels_luma_8_32x32_c: 1406.1 ( 1.00x) > 528.0 ( 1.00x) > put_uni_pixels_luma_8_32x32_rvv_i32: 230.3 ( 6.10x) > 131.9 ( 4.00x) > put_uni_pixels_luma_8_64x64_c: 4600.6 ( 1.00x) > 1309.2 ( 1.00x) > put_uni_pixels_luma_8_64x64_rvv_i32: 1073.1 ( 4.29x) > 382.2 ( 3.43x) > put_uni_pixels_luma_8_128x128_c: 11350.3 ( 1.00x) > 3506.9 ( 1.00x) > put_uni_pixels_luma_8_128x128_rvv_i32: 3119.1 ( 3.64x) > 2017.5 ( 1.74x) > --- > libavcodec/riscv/h26x/h2656_inter_rvv.S | 53 +++++++++++++++++++++++++ > libavcodec/riscv/h26x/h2656dsp.h | 33 +++++++++++++++ > libavcodec/riscv/vvc/Makefile | 3 +- > libavcodec/riscv/vvc/vvcdsp_init.c | 5 +++ > 4 files changed, 93 insertions(+), 1 deletion(-) > create mode 100644 libavcodec/riscv/h26x/h2656_inter_rvv.S > create mode 100644 libavcodec/riscv/h26x/h2656dsp.h > > diff --git a/libavcodec/riscv/h26x/h2656_inter_rvv.S > b/libavcodec/riscv/h26x/h2656_inter_rvv.S > new file mode 100644 > index 0000000000..6692e33acf > --- /dev/null > +++ b/libavcodec/riscv/h26x/h2656_inter_rvv.S > @@ -0,0 +1,53 @@ > +/* > + * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences > (ISCAS). > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA > 02110-1301 USA > + */ > + > +#include "libavcodec/riscv/h26x/asm.S" > + > +.macro put_uni_pixels w, vlen, id > +\id\w\vlen: > +.if \w == 128 && \vlen == 128 > + li t0, \w > + vsetvli zero, t0, e8, m8, ta, ma > +.else > + vsetvlstatic8 \w, \vlen > +.endif > +1: > + vle8.v v0, (a2) > + addi a4, a4, -1 > + vse8.v v0, (a0) > + add a2, a2, a3 > + add a0, a0, a1 > + bnez a4, 1b > + ret > +.endm > + > +.macro func_put_uni_pixels vlen > +func ff_h2656_put_uni_pixels_8_rvv_\vlen\(), zve32x, zbb, zba > + lpad 0 > + POW2_JMP_TABLE 4, \vlen > + POW2_J \vlen, 4, a7 > + .irp w,2,4,8,16,32,64,128 > + put_uni_pixels \w, \vlen, 4 > + .endr > +endfunc > +.endm > + > +func_put_uni_pixels 256 > +func_put_uni_pixels 128 > diff --git a/libavcodec/riscv/h26x/h2656dsp.h > b/libavcodec/riscv/h26x/h2656dsp.h > new file mode 100644 > index 0000000000..41ba6bc331 > --- /dev/null > +++ b/libavcodec/riscv/h26x/h2656dsp.h > @@ -0,0 +1,33 @@ > +/* > + * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences > (ISCAS). > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA > 02110-1301 USA > + */ > + > +#ifndef AVCODEC_RISCV_H26X_H2656DSP_H > +#define AVCODEC_RISCV_H26X_H2656DSP_H > + > +#define H2656_PEL_PROTOTYPE(name, D, opt) \ > +void ff_h2656_put_uni_ ## name ## _ ## D ## _##opt(uint8_t *_dst, > ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int > height, const int8_t *hf, const int8_t *vf, int width) \ > + > +#define H2656_MC_8TAP_PROTOTYPES(fname, bitd, opt) \ > + H2656_PEL_PROTOTYPE(fname, bitd, opt); \ > + > +H2656_MC_8TAP_PROTOTYPES(pixels , 8, rvv_256); > +H2656_MC_8TAP_PROTOTYPES(pixels , 8, rvv_128); > + > +#endif > diff --git a/libavcodec/riscv/vvc/Makefile b/libavcodec/riscv/vvc/Makefile > index 582b051579..ec116aebc1 100644 > --- a/libavcodec/riscv/vvc/Makefile > +++ b/libavcodec/riscv/vvc/Makefile > @@ -1,2 +1,3 @@ > OBJS-$(CONFIG_VVC_DECODER) += riscv/vvc/vvcdsp_init.o > -RVV-OBJS-$(CONFIG_VVC_DECODER) += riscv/vvc/vvc_mc_rvv.o > +RVV-OBJS-$(CONFIG_VVC_DECODER) += riscv/vvc/vvc_mc_rvv.o \ > + riscv/h26x/h2656_inter_rvv.o > diff --git a/libavcodec/riscv/vvc/vvcdsp_init.c > b/libavcodec/riscv/vvc/vvcdsp_init.c > index bee892cb7c..9dea70f392 100644 > --- a/libavcodec/riscv/vvc/vvcdsp_init.c > +++ b/libavcodec/riscv/vvc/vvcdsp_init.c > @@ -25,6 +25,7 @@ > #include "libavutil/riscv/cpu.h" > #include "libavcodec/vvc/dsp.h" > #include "libavcodec/vvc/dec.h" > +#include "libavcodec/riscv/h26x/h2656dsp.h" > > #define bf(fn, bd, opt) fn##_##bd##_##opt > > @@ -72,8 +73,12 @@ PUT_PIXELS_PROTOTYPES2(8, rvv_256) > c->inter.dst[C][w][idx1][idx2] = a; > \ > } while (0) > \ > > +#define DIR_FUNCS(d, C, opt) > \ > + PEL_FUNC(put_##d, C, 0, 0, ff_h2656_put_##d##_pixels_8_##opt); > \ > + > #define FUNCS(C, opt) > \ > PEL_FUNC(put, C, 0, 0, ff_vvc_put_pixels_8_##opt); > \ > + DIR_FUNCS(uni, C, opt); > \ > > void ff_vvc_dsp_init_riscv(VVCDSPContext *const c, const int bd) > { > -- > 2.47.0 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". >
Le maanantaina 28. lokakuuta 2024, 19.08.24 EET uk7b@foxmail.com a écrit : > From: sunyuechi <sunyuechi@iscas.ac.cn> > > k230 > banana_f3 put_uni_pixels_chroma_8_4x4_c: 128.3 ( > 1.00x) 90.5 ( 1.00x) put_uni_pixels_chroma_8_4x4_rvv_i32: > 17.6 ( 7.30x) 17.4 ( 5.18x) put_uni_pixels_chroma_8_8x8_c: > 295.1 ( 1.00x) 163.2 ( 1.00x) > put_uni_pixels_chroma_8_8x8_rvv_i32: 35.8 ( 8.24x) > 27.9 ( 5.84x) put_uni_pixels_chroma_8_16x16_c: 619.3 > ( 1.00x) 267.4 ( 1.00x) put_uni_pixels_chroma_8_16x16_rvv_i32: > 72.8 ( 8.50x) 48.7 ( 5.49x) put_uni_pixels_chroma_8_32x32_c: > 1433.8 ( 1.00x) 538.2 ( 1.00x) > put_uni_pixels_chroma_8_32x32_rvv_i32: 230.3 ( 6.23x) > 236.2 ( 2.28x) put_uni_pixels_chroma_8_64x64_c: 3517.3 > ( 1.00x) 1455.0 ( 1.00x) put_uni_pixels_chroma_8_64x64_rvv_i32: > 813.6 ( 4.32x) 590.2 ( 2.47x) put_uni_pixels_chroma_8_128x128_c: > 10174.6 ( 1.00x) 5798.7 ( 1.00x) > put_uni_pixels_chroma_8_128x128_rvv_i32: 2989.3 ( 3.40x) > 2371.4 ( 2.45x) put_uni_pixels_luma_8_4x4_c: > 128.6 ( 1.00x) 90.5 ( 1.00x) put_uni_pixels_luma_8_4x4_rvv_i32: > 17.3 ( 7.42x) 17.4 ( 5.18x) put_uni_pixels_luma_8_8x8_c: > 295.1 ( 1.00x) 142.4 ( 1.00x) > put_uni_pixels_luma_8_8x8_rvv_i32: 26.6 (11.10x) > 27.9 ( 5.10x) put_uni_pixels_luma_8_16x16_c: 600.6 > ( 1.00x) 277.7 ( 1.00x) put_uni_pixels_luma_8_16x16_rvv_i32: > 82.1 ( 7.32x) 48.7 ( 5.70x) put_uni_pixels_luma_8_32x32_c: > 1406.1 ( 1.00x) 528.0 ( 1.00x) > put_uni_pixels_luma_8_32x32_rvv_i32: 230.3 ( 6.10x) > 131.9 ( 4.00x) put_uni_pixels_luma_8_64x64_c: 4600.6 > ( 1.00x) 1309.2 ( 1.00x) put_uni_pixels_luma_8_64x64_rvv_i32: > 1073.1 ( 4.29x) 382.2 ( 3.43x) put_uni_pixels_luma_8_128x128_c: > 11350.3 ( 1.00x) 3506.9 ( 1.00x) > put_uni_pixels_luma_8_128x128_rvv_i32: 3119.1 ( 3.64x) > 2017.5 ( 1.74x) --- > libavcodec/riscv/h26x/h2656_inter_rvv.S | 53 +++++++++++++++++++++++++ > libavcodec/riscv/h26x/h2656dsp.h | 33 +++++++++++++++ > libavcodec/riscv/vvc/Makefile | 3 +- > libavcodec/riscv/vvc/vvcdsp_init.c | 5 +++ > 4 files changed, 93 insertions(+), 1 deletion(-) > create mode 100644 libavcodec/riscv/h26x/h2656_inter_rvv.S > create mode 100644 libavcodec/riscv/h26x/h2656dsp.h > > diff --git a/libavcodec/riscv/h26x/h2656_inter_rvv.S > b/libavcodec/riscv/h26x/h2656_inter_rvv.S new file mode 100644 > index 0000000000..6692e33acf > --- /dev/null > +++ b/libavcodec/riscv/h26x/h2656_inter_rvv.S > @@ -0,0 +1,53 @@ > +/* > + * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences > (ISCAS). + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA + */ > + > +#include "libavcodec/riscv/h26x/asm.S" > + > +.macro put_uni_pixels w, vlen, id > +\id\w\vlen: > +.if \w == 128 && \vlen == 128 > + li t0, \w > + vsetvli zero, t0, e8, m8, ta, ma > +.else > + vsetvlstatic8 \w, \vlen > +.endif > +1: > + vle8.v v0, (a2) > + addi a4, a4, -1 > + vse8.v v0, (a0) > + add a2, a2, a3 > + add a0, a0, a1 > + bnez a4, 1b > + ret > +.endm Is this going to be reused anywhere? it seems the macro is only used once atm. Also is there a reason to use RVV here instead of just unaligned RVI? > + > +.macro func_put_uni_pixels vlen > +func ff_h2656_put_uni_pixels_8_rvv_\vlen\(), zve32x, zbb, zba > + lpad 0 > + POW2_JMP_TABLE 4, \vlen > + POW2_J \vlen, 4, a7 > + .irp w,2,4,8,16,32,64,128 > + put_uni_pixels \w, \vlen, 4 > + .endr > +endfunc > +.endm > + > +func_put_uni_pixels 256 > +func_put_uni_pixels 128 > diff --git a/libavcodec/riscv/h26x/h2656dsp.h > b/libavcodec/riscv/h26x/h2656dsp.h new file mode 100644 > index 0000000000..41ba6bc331 > --- /dev/null > +++ b/libavcodec/riscv/h26x/h2656dsp.h > @@ -0,0 +1,33 @@ > +/* > + * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences > (ISCAS). + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA + */ > + > +#ifndef AVCODEC_RISCV_H26X_H2656DSP_H > +#define AVCODEC_RISCV_H26X_H2656DSP_H > + > +#define H2656_PEL_PROTOTYPE(name, D, opt) \ > +void ff_h2656_put_uni_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t > _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, const > int8_t *hf, const int8_t *vf, int width) \ + > +#define H2656_MC_8TAP_PROTOTYPES(fname, bitd, opt) \ > + H2656_PEL_PROTOTYPE(fname, bitd, opt); \ > + > +H2656_MC_8TAP_PROTOTYPES(pixels , 8, rvv_256); > +H2656_MC_8TAP_PROTOTYPES(pixels , 8, rvv_128); > + > +#endif > diff --git a/libavcodec/riscv/vvc/Makefile b/libavcodec/riscv/vvc/Makefile > index 582b051579..ec116aebc1 100644 > --- a/libavcodec/riscv/vvc/Makefile > +++ b/libavcodec/riscv/vvc/Makefile > @@ -1,2 +1,3 @@ > OBJS-$(CONFIG_VVC_DECODER) += riscv/vvc/vvcdsp_init.o > -RVV-OBJS-$(CONFIG_VVC_DECODER) += riscv/vvc/vvc_mc_rvv.o > +RVV-OBJS-$(CONFIG_VVC_DECODER) += riscv/vvc/vvc_mc_rvv.o \ > + riscv/h26x/h2656_inter_rvv.o > diff --git a/libavcodec/riscv/vvc/vvcdsp_init.c > b/libavcodec/riscv/vvc/vvcdsp_init.c index bee892cb7c..9dea70f392 100644 > --- a/libavcodec/riscv/vvc/vvcdsp_init.c > +++ b/libavcodec/riscv/vvc/vvcdsp_init.c > @@ -25,6 +25,7 @@ > #include "libavutil/riscv/cpu.h" > #include "libavcodec/vvc/dsp.h" > #include "libavcodec/vvc/dec.h" > +#include "libavcodec/riscv/h26x/h2656dsp.h" > > #define bf(fn, bd, opt) fn##_##bd##_##opt > > @@ -72,8 +73,12 @@ PUT_PIXELS_PROTOTYPES2(8, rvv_256) > c->inter.dst[C][w][idx1][idx2] = a; > \ } while (0) > \ > > +#define DIR_FUNCS(d, C, opt) > \ + PEL_FUNC(put_##d, C, 0, 0, > ff_h2656_put_##d##_pixels_8_##opt); \ + > #define FUNCS(C, opt) > \ PEL_FUNC(put, C, 0, 0, ff_vvc_put_pixels_8_##opt); > \ + DIR_FUNCS(uni, C, opt); > \ > > void ff_vvc_dsp_init_riscv(VVCDSPContext *const c, const int bd) > {
> Is this going to be reused anywhere? it seems the macro is only used once atm. The next patch will use ([PATCH 4/5] lavc/hevc: R-V V pel_uni(pow2)) > Also is there a reason to use RVV here instead of just unaligned RVI? Yes, RVI is enough; I deleted it and resent it. Rémi Denis-Courmont <remi@remlab.net> 于2024年11月9日周六 23:33写道: > Le maanantaina 28. lokakuuta 2024, 19.08.24 EET uk7b@foxmail.com a écrit : > > From: sunyuechi <sunyuechi@iscas.ac.cn> > > > > k230 > > banana_f3 put_uni_pixels_chroma_8_4x4_c: 128.3 ( > > 1.00x) 90.5 ( 1.00x) put_uni_pixels_chroma_8_4x4_rvv_i32: > > > 17.6 ( 7.30x) 17.4 ( 5.18x) put_uni_pixels_chroma_8_8x8_c: > > > 295.1 ( 1.00x) 163.2 ( 1.00x) > > put_uni_pixels_chroma_8_8x8_rvv_i32: 35.8 ( 8.24x) > > 27.9 ( 5.84x) put_uni_pixels_chroma_8_16x16_c: > 619.3 > > ( 1.00x) 267.4 ( 1.00x) put_uni_pixels_chroma_8_16x16_rvv_i32: > > > 72.8 ( 8.50x) 48.7 ( 5.49x) put_uni_pixels_chroma_8_32x32_c: > > > 1433.8 ( 1.00x) 538.2 ( 1.00x) > > put_uni_pixels_chroma_8_32x32_rvv_i32: 230.3 ( 6.23x) > > 236.2 ( 2.28x) put_uni_pixels_chroma_8_64x64_c: > 3517.3 > > ( 1.00x) 1455.0 ( 1.00x) put_uni_pixels_chroma_8_64x64_rvv_i32: > > > 813.6 ( 4.32x) 590.2 ( 2.47x) > put_uni_pixels_chroma_8_128x128_c: > > 10174.6 ( 1.00x) 5798.7 ( 1.00x) > > put_uni_pixels_chroma_8_128x128_rvv_i32: 2989.3 ( 3.40x) > > 2371.4 ( 2.45x) put_uni_pixels_luma_8_4x4_c: > > 128.6 ( 1.00x) 90.5 ( 1.00x) put_uni_pixels_luma_8_4x4_rvv_i32: > > > 17.3 ( 7.42x) 17.4 ( 5.18x) put_uni_pixels_luma_8_8x8_c: > > > 295.1 ( 1.00x) 142.4 ( 1.00x) > > put_uni_pixels_luma_8_8x8_rvv_i32: 26.6 (11.10x) > > 27.9 ( 5.10x) put_uni_pixels_luma_8_16x16_c: > 600.6 > > ( 1.00x) 277.7 ( 1.00x) put_uni_pixels_luma_8_16x16_rvv_i32: > > > 82.1 ( 7.32x) 48.7 ( 5.70x) put_uni_pixels_luma_8_32x32_c: > > > 1406.1 ( 1.00x) 528.0 ( 1.00x) > > put_uni_pixels_luma_8_32x32_rvv_i32: 230.3 ( 6.10x) > > 131.9 ( 4.00x) put_uni_pixels_luma_8_64x64_c: > 4600.6 > > ( 1.00x) 1309.2 ( 1.00x) put_uni_pixels_luma_8_64x64_rvv_i32: > > > 1073.1 ( 4.29x) 382.2 ( 3.43x) > put_uni_pixels_luma_8_128x128_c: > > 11350.3 ( 1.00x) 3506.9 ( 1.00x) > > put_uni_pixels_luma_8_128x128_rvv_i32: 3119.1 ( 3.64x) > > 2017.5 ( 1.74x) --- > > libavcodec/riscv/h26x/h2656_inter_rvv.S | 53 +++++++++++++++++++++++++ > > libavcodec/riscv/h26x/h2656dsp.h | 33 +++++++++++++++ > > libavcodec/riscv/vvc/Makefile | 3 +- > > libavcodec/riscv/vvc/vvcdsp_init.c | 5 +++ > > 4 files changed, 93 insertions(+), 1 deletion(-) > > create mode 100644 libavcodec/riscv/h26x/h2656_inter_rvv.S > > create mode 100644 libavcodec/riscv/h26x/h2656dsp.h > > > > diff --git a/libavcodec/riscv/h26x/h2656_inter_rvv.S > > b/libavcodec/riscv/h26x/h2656_inter_rvv.S new file mode 100644 > > index 0000000000..6692e33acf > > --- /dev/null > > +++ b/libavcodec/riscv/h26x/h2656_inter_rvv.S > > @@ -0,0 +1,53 @@ > > +/* > > + * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences > > (ISCAS). + * > > + * This file is part of FFmpeg. > > + * > > + * FFmpeg is free software; you can redistribute it and/or > > + * modify it under the terms of the GNU Lesser General Public > > + * License as published by the Free Software Foundation; either > > + * version 2.1 of the License, or (at your option) any later version. > > + * > > + * FFmpeg is distributed in the hope that it will be useful, > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + * Lesser General Public License for more details. > > + * > > + * You should have received a copy of the GNU Lesser General Public > > + * License along with FFmpeg; if not, write to the Free Software > > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA > 02110-1301 > > USA + */ > > + > > +#include "libavcodec/riscv/h26x/asm.S" > > + > > +.macro put_uni_pixels w, vlen, id > > +\id\w\vlen: > > +.if \w == 128 && \vlen == 128 > > + li t0, \w > > + vsetvli zero, t0, e8, m8, ta, ma > > +.else > > + vsetvlstatic8 \w, \vlen > > +.endif > > +1: > > + vle8.v v0, (a2) > > + addi a4, a4, -1 > > + vse8.v v0, (a0) > > + add a2, a2, a3 > > + add a0, a0, a1 > > + bnez a4, 1b > > + ret > > +.endm > > Is this going to be reused anywhere? it seems the macro is only used once > atm. > > Also is there a reason to use RVV here instead of just unaligned RVI? > > > + > > +.macro func_put_uni_pixels vlen > > +func ff_h2656_put_uni_pixels_8_rvv_\vlen\(), zve32x, zbb, zba > > + lpad 0 > > + POW2_JMP_TABLE 4, \vlen > > + POW2_J \vlen, 4, a7 > > + .irp w,2,4,8,16,32,64,128 > > + put_uni_pixels \w, \vlen, 4 > > + .endr > > +endfunc > > +.endm > > + > > +func_put_uni_pixels 256 > > +func_put_uni_pixels 128 > > diff --git a/libavcodec/riscv/h26x/h2656dsp.h > > b/libavcodec/riscv/h26x/h2656dsp.h new file mode 100644 > > index 0000000000..41ba6bc331 > > --- /dev/null > > +++ b/libavcodec/riscv/h26x/h2656dsp.h > > @@ -0,0 +1,33 @@ > > +/* > > + * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences > > (ISCAS). + * > > + * This file is part of FFmpeg. > > + * > > + * FFmpeg is free software; you can redistribute it and/or > > + * modify it under the terms of the GNU Lesser General Public > > + * License as published by the Free Software Foundation; either > > + * version 2.1 of the License, or (at your option) any later version. > > + * > > + * FFmpeg is distributed in the hope that it will be useful, > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + * Lesser General Public License for more details. > > + * > > + * You should have received a copy of the GNU Lesser General Public > > + * License along with FFmpeg; if not, write to the Free Software > > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA > 02110-1301 > > USA + */ > > + > > +#ifndef AVCODEC_RISCV_H26X_H2656DSP_H > > +#define AVCODEC_RISCV_H26X_H2656DSP_H > > + > > +#define H2656_PEL_PROTOTYPE(name, D, opt) \ > > +void ff_h2656_put_uni_ ## name ## _ ## D ## _##opt(uint8_t *_dst, > ptrdiff_t > > _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, const > > int8_t *hf, const int8_t *vf, int width) \ + > > +#define H2656_MC_8TAP_PROTOTYPES(fname, bitd, opt) \ > > + H2656_PEL_PROTOTYPE(fname, bitd, opt); \ > > + > > +H2656_MC_8TAP_PROTOTYPES(pixels , 8, rvv_256); > > +H2656_MC_8TAP_PROTOTYPES(pixels , 8, rvv_128); > > + > > +#endif > > diff --git a/libavcodec/riscv/vvc/Makefile > b/libavcodec/riscv/vvc/Makefile > > index 582b051579..ec116aebc1 100644 > > --- a/libavcodec/riscv/vvc/Makefile > > +++ b/libavcodec/riscv/vvc/Makefile > > @@ -1,2 +1,3 @@ > > OBJS-$(CONFIG_VVC_DECODER) += riscv/vvc/vvcdsp_init.o > > -RVV-OBJS-$(CONFIG_VVC_DECODER) += riscv/vvc/vvc_mc_rvv.o > > +RVV-OBJS-$(CONFIG_VVC_DECODER) += riscv/vvc/vvc_mc_rvv.o \ > > + riscv/h26x/h2656_inter_rvv.o > > diff --git a/libavcodec/riscv/vvc/vvcdsp_init.c > > b/libavcodec/riscv/vvc/vvcdsp_init.c index bee892cb7c..9dea70f392 100644 > > --- a/libavcodec/riscv/vvc/vvcdsp_init.c > > +++ b/libavcodec/riscv/vvc/vvcdsp_init.c > > @@ -25,6 +25,7 @@ > > #include "libavutil/riscv/cpu.h" > > #include "libavcodec/vvc/dsp.h" > > #include "libavcodec/vvc/dec.h" > > +#include "libavcodec/riscv/h26x/h2656dsp.h" > > > > #define bf(fn, bd, opt) fn##_##bd##_##opt > > > > @@ -72,8 +73,12 @@ PUT_PIXELS_PROTOTYPES2(8, rvv_256) > > c->inter.dst[C][w][idx1][idx2] = a; > > > \ } while (0) > > > \ > > > > +#define DIR_FUNCS(d, C, opt) > > > \ + PEL_FUNC(put_##d, C, 0, 0, > > ff_h2656_put_##d##_pixels_8_##opt); \ + > > #define FUNCS(C, opt) > > > \ PEL_FUNC(put, C, 0, 0, ff_vvc_put_pixels_8_##opt); > > > \ + DIR_FUNCS(uni, C, opt); > > > \ > > > > void ff_vvc_dsp_init_riscv(VVCDSPContext *const c, const int bd) > > { > > > -- > レミ・デニ-クールモン > http://www.remlab.net/ > > > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". >
diff --git a/libavcodec/riscv/h26x/h2656_inter_rvv.S b/libavcodec/riscv/h26x/h2656_inter_rvv.S new file mode 100644 index 0000000000..6692e33acf --- /dev/null +++ b/libavcodec/riscv/h26x/h2656_inter_rvv.S @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS). + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavcodec/riscv/h26x/asm.S" + +.macro put_uni_pixels w, vlen, id +\id\w\vlen: +.if \w == 128 && \vlen == 128 + li t0, \w + vsetvli zero, t0, e8, m8, ta, ma +.else + vsetvlstatic8 \w, \vlen +.endif +1: + vle8.v v0, (a2) + addi a4, a4, -1 + vse8.v v0, (a0) + add a2, a2, a3 + add a0, a0, a1 + bnez a4, 1b + ret +.endm + +.macro func_put_uni_pixels vlen +func ff_h2656_put_uni_pixels_8_rvv_\vlen\(), zve32x, zbb, zba + lpad 0 + POW2_JMP_TABLE 4, \vlen + POW2_J \vlen, 4, a7 + .irp w,2,4,8,16,32,64,128 + put_uni_pixels \w, \vlen, 4 + .endr +endfunc +.endm + +func_put_uni_pixels 256 +func_put_uni_pixels 128 diff --git a/libavcodec/riscv/h26x/h2656dsp.h b/libavcodec/riscv/h26x/h2656dsp.h new file mode 100644 index 0000000000..41ba6bc331 --- /dev/null +++ b/libavcodec/riscv/h26x/h2656dsp.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS). + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_RISCV_H26X_H2656DSP_H +#define AVCODEC_RISCV_H26X_H2656DSP_H + +#define H2656_PEL_PROTOTYPE(name, D, opt) \ +void ff_h2656_put_uni_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, int width) \ + +#define H2656_MC_8TAP_PROTOTYPES(fname, bitd, opt) \ + H2656_PEL_PROTOTYPE(fname, bitd, opt); \ + +H2656_MC_8TAP_PROTOTYPES(pixels , 8, rvv_256); +H2656_MC_8TAP_PROTOTYPES(pixels , 8, rvv_128); + +#endif diff --git a/libavcodec/riscv/vvc/Makefile b/libavcodec/riscv/vvc/Makefile index 582b051579..ec116aebc1 100644 --- a/libavcodec/riscv/vvc/Makefile +++ b/libavcodec/riscv/vvc/Makefile @@ -1,2 +1,3 @@ OBJS-$(CONFIG_VVC_DECODER) += riscv/vvc/vvcdsp_init.o -RVV-OBJS-$(CONFIG_VVC_DECODER) += riscv/vvc/vvc_mc_rvv.o +RVV-OBJS-$(CONFIG_VVC_DECODER) += riscv/vvc/vvc_mc_rvv.o \ + riscv/h26x/h2656_inter_rvv.o diff --git a/libavcodec/riscv/vvc/vvcdsp_init.c b/libavcodec/riscv/vvc/vvcdsp_init.c index bee892cb7c..9dea70f392 100644 --- a/libavcodec/riscv/vvc/vvcdsp_init.c +++ b/libavcodec/riscv/vvc/vvcdsp_init.c @@ -25,6 +25,7 @@ #include "libavutil/riscv/cpu.h" #include "libavcodec/vvc/dsp.h" #include "libavcodec/vvc/dec.h" +#include "libavcodec/riscv/h26x/h2656dsp.h" #define bf(fn, bd, opt) fn##_##bd##_##opt @@ -72,8 +73,12 @@ PUT_PIXELS_PROTOTYPES2(8, rvv_256) c->inter.dst[C][w][idx1][idx2] = a; \ } while (0) \ +#define DIR_FUNCS(d, C, opt) \ + PEL_FUNC(put_##d, C, 0, 0, ff_h2656_put_##d##_pixels_8_##opt); \ + #define FUNCS(C, opt) \ PEL_FUNC(put, C, 0, 0, ff_vvc_put_pixels_8_##opt); \ + DIR_FUNCS(uni, C, opt); \ void ff_vvc_dsp_init_riscv(VVCDSPContext *const c, const int bd) {
From: sunyuechi <sunyuechi@iscas.ac.cn> k230 banana_f3 put_uni_pixels_chroma_8_4x4_c: 128.3 ( 1.00x) 90.5 ( 1.00x) put_uni_pixels_chroma_8_4x4_rvv_i32: 17.6 ( 7.30x) 17.4 ( 5.18x) put_uni_pixels_chroma_8_8x8_c: 295.1 ( 1.00x) 163.2 ( 1.00x) put_uni_pixels_chroma_8_8x8_rvv_i32: 35.8 ( 8.24x) 27.9 ( 5.84x) put_uni_pixels_chroma_8_16x16_c: 619.3 ( 1.00x) 267.4 ( 1.00x) put_uni_pixels_chroma_8_16x16_rvv_i32: 72.8 ( 8.50x) 48.7 ( 5.49x) put_uni_pixels_chroma_8_32x32_c: 1433.8 ( 1.00x) 538.2 ( 1.00x) put_uni_pixels_chroma_8_32x32_rvv_i32: 230.3 ( 6.23x) 236.2 ( 2.28x) put_uni_pixels_chroma_8_64x64_c: 3517.3 ( 1.00x) 1455.0 ( 1.00x) put_uni_pixels_chroma_8_64x64_rvv_i32: 813.6 ( 4.32x) 590.2 ( 2.47x) put_uni_pixels_chroma_8_128x128_c: 10174.6 ( 1.00x) 5798.7 ( 1.00x) put_uni_pixels_chroma_8_128x128_rvv_i32: 2989.3 ( 3.40x) 2371.4 ( 2.45x) put_uni_pixels_luma_8_4x4_c: 128.6 ( 1.00x) 90.5 ( 1.00x) put_uni_pixels_luma_8_4x4_rvv_i32: 17.3 ( 7.42x) 17.4 ( 5.18x) put_uni_pixels_luma_8_8x8_c: 295.1 ( 1.00x) 142.4 ( 1.00x) put_uni_pixels_luma_8_8x8_rvv_i32: 26.6 (11.10x) 27.9 ( 5.10x) put_uni_pixels_luma_8_16x16_c: 600.6 ( 1.00x) 277.7 ( 1.00x) put_uni_pixels_luma_8_16x16_rvv_i32: 82.1 ( 7.32x) 48.7 ( 5.70x) put_uni_pixels_luma_8_32x32_c: 1406.1 ( 1.00x) 528.0 ( 1.00x) put_uni_pixels_luma_8_32x32_rvv_i32: 230.3 ( 6.10x) 131.9 ( 4.00x) put_uni_pixels_luma_8_64x64_c: 4600.6 ( 1.00x) 1309.2 ( 1.00x) put_uni_pixels_luma_8_64x64_rvv_i32: 1073.1 ( 4.29x) 382.2 ( 3.43x) put_uni_pixels_luma_8_128x128_c: 11350.3 ( 1.00x) 3506.9 ( 1.00x) put_uni_pixels_luma_8_128x128_rvv_i32: 3119.1 ( 3.64x) 2017.5 ( 1.74x) --- libavcodec/riscv/h26x/h2656_inter_rvv.S | 53 +++++++++++++++++++++++++ libavcodec/riscv/h26x/h2656dsp.h | 33 +++++++++++++++ libavcodec/riscv/vvc/Makefile | 3 +- libavcodec/riscv/vvc/vvcdsp_init.c | 5 +++ 4 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 libavcodec/riscv/h26x/h2656_inter_rvv.S create mode 100644 libavcodec/riscv/h26x/h2656dsp.h