diff mbox series

[FFmpeg-devel,1/6] lavc/pixblockdsp: rename unaligned R-V V functions

Message ID 20231027192540.27373-1-remi@remlab.net
State Accepted
Commit 722765687b666fe630c5dbc82f39a7035a97edc0
Headers show
Series [FFmpeg-devel,1/6] lavc/pixblockdsp: rename unaligned R-V V functions | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Rémi Denis-Courmont Oct. 27, 2023, 7:25 p.m. UTC
---
 libavcodec/riscv/pixblockdsp_init.c | 26 +++++++++++++++-----------
 libavcodec/riscv/pixblockdsp_rvv.S  |  6 +++---
 2 files changed, 18 insertions(+), 14 deletions(-)

Comments

Rémi Denis-Courmont Oct. 28, 2023, 8:22 a.m. UTC | #1
P.S.:

It took some additional efforts to get some benchmarks with proto-RVV.
But here they are:

idctdsp.add_pixels_clamped_c: 259.5
idctdsp.add_pixels_clamped_rvv_i64: 90.5
idctdsp.put_pixels_clamped_c: 186.5
idctdsp.put_pixels_clamped_rvv_i64: 65.5
idctdsp.put_signed_pixels_clamped_c: 209.5
idctdsp.put_signed_pixels_clamped_rvv_i64: 61.5

diff_pixels_c: 177.0
diff_pixels_rvv_i64: 72.0
get_pixels_c: 114.2
get_pixels_rvi: 96.2
get_pixels_rvv_i64: 62.0

The old code can't run on proto-RVV, so comparison before/after patchset is 
not possible.
diff mbox series

Patch

diff --git a/libavcodec/riscv/pixblockdsp_init.c b/libavcodec/riscv/pixblockdsp_init.c
index aa39a8a665..8f24281217 100644
--- a/libavcodec/riscv/pixblockdsp_init.c
+++ b/libavcodec/riscv/pixblockdsp_init.c
@@ -32,12 +32,12 @@  void ff_get_pixels_8_rvi(int16_t *block, const uint8_t *pixels,
 void ff_get_pixels_16_rvi(int16_t *block, const uint8_t *pixels,
                           ptrdiff_t stride);
 
-void ff_get_pixels_8_rvv(int16_t *block, const uint8_t *pixels,
-                         ptrdiff_t stride);
-void ff_get_pixels_16_rvv(int16_t *block, const uint8_t *pixels,
-                          ptrdiff_t stride);
-void ff_diff_pixels_rvv(int16_t *block, const uint8_t *s1, const uint8_t *s2,
-                        ptrdiff_t stride);
+void ff_get_pixels_unaligned_8_rvv(int16_t *block, const uint8_t *pixels,
+                                   ptrdiff_t stride);
+void ff_get_pixels_unaligned_16_rvv(int16_t *block, const uint8_t *pixels,
+                                    ptrdiff_t stride);
+void ff_diff_pixels_unaligned_rvv(int16_t *block, const uint8_t *s1,
+                                  const uint8_t *s2, ptrdiff_t stride);
 
 av_cold void ff_pixblockdsp_init_riscv(PixblockDSPContext *c,
                                        AVCodecContext *avctx,
@@ -54,12 +54,16 @@  av_cold void ff_pixblockdsp_init_riscv(PixblockDSPContext *c,
 
 #if HAVE_RVV
     if ((cpu_flags & AV_CPU_FLAG_RVV_I32) && ff_get_rv_vlenb() >= 16) {
-        if (high_bit_depth)
-            c->get_pixels_unaligned = c->get_pixels = ff_get_pixels_16_rvv;
-        else
-            c->get_pixels_unaligned = c->get_pixels = ff_get_pixels_8_rvv;
+        if (high_bit_depth) {
+            c->get_pixels = ff_get_pixels_unaligned_16_rvv;
+            c->get_pixels_unaligned = ff_get_pixels_unaligned_16_rvv;
+        } else {
+            c->get_pixels = ff_get_pixels_unaligned_8_rvv;
+            c->get_pixels_unaligned = ff_get_pixels_unaligned_8_rvv;
+        }
 
-        c->diff_pixels_unaligned = c->diff_pixels = ff_diff_pixels_rvv;
+        c->diff_pixels = ff_diff_pixels_unaligned_rvv;
+        c->diff_pixels_unaligned = ff_diff_pixels_unaligned_rvv;
     }
 #endif
 }
diff --git a/libavcodec/riscv/pixblockdsp_rvv.S b/libavcodec/riscv/pixblockdsp_rvv.S
index 1a364e6dab..e3a2fcc6ef 100644
--- a/libavcodec/riscv/pixblockdsp_rvv.S
+++ b/libavcodec/riscv/pixblockdsp_rvv.S
@@ -20,7 +20,7 @@ 
 
 #include "libavutil/riscv/asm.S"
 
-func ff_get_pixels_8_rvv, zve32x
+func ff_get_pixels_unaligned_8_rvv, zve32x
         vsetivli     zero, 8, e8, mf2, ta, ma
         vlsseg8e8.v  v16, (a1), a2
         vwcvtu.x.x.v v8, v16
@@ -35,14 +35,14 @@  func ff_get_pixels_8_rvv, zve32x
         ret
 endfunc
 
-func ff_get_pixels_16_rvv, zve32x
+func ff_get_pixels_unaligned_16_rvv, zve32x
         vsetivli     zero, 8, e16, m1, ta, ma
         vlsseg8e16.v v0, (a1), a2
         vsseg8e16.v  v0, (a0)
         ret
 endfunc
 
-func ff_diff_pixels_rvv, zve32x
+func ff_diff_pixels_unaligned_rvv, zve32x
         vsetivli    zero, 8, e8, mf2, ta, ma
         vlsseg8e8.v v16, (a1), a3
         vlsseg8e8.v v24, (a2), a3