diff mbox series

[FFmpeg-devel,2/2] lavc/bswapdsp: purge RISC-V V bswap32

Message ID 20230716151352.121105-2-remi@remlab.net
State Accepted
Commit 61e5ca4ded9c5fe697fdffe69bb041d5852556ba
Headers show
Series [FFmpeg-devel,1/2] lavc/bswapdsp: rewrite RISC-V V bswap16 | expand

Checks

Context Check Description
yinshiyou/configure_loongarch64 warning Failed to apply patch
andriy/configure_x86 warning Failed to apply patch

Commit Message

Rémi Denis-Courmont July 16, 2023, 3:13 p.m. UTC
This cannot beat the Zbb implementation, and it is unlikely that a real
meaningful CPU design would support V and not Zbb. The best loop rewrite
that I could come up with (4 shifts, 2 ands, 3 ors) is still ~40% slower
than Zbb.

A proper faster vector implementation should be feasible with the
cryptographic vector extensions, but that is a story for another time.
---
 libavcodec/riscv/bswapdsp_init.c |  5 +----
 libavcodec/riscv/bswapdsp_rvv.S  | 23 -----------------------
 2 files changed, 1 insertion(+), 27 deletions(-)
diff mbox series

Patch

diff --git a/libavcodec/riscv/bswapdsp_init.c b/libavcodec/riscv/bswapdsp_init.c
index 6ad63e3805..ed666c9b3a 100644
--- a/libavcodec/riscv/bswapdsp_init.c
+++ b/libavcodec/riscv/bswapdsp_init.c
@@ -26,7 +26,6 @@ 
 #include "libavcodec/bswapdsp.h"
 
 void ff_bswap32_buf_rvb(uint32_t *dst, const uint32_t *src, int len);
-void ff_bswap32_buf_rvv(uint32_t *dst, const uint32_t *src, int len);
 void ff_bswap16_buf_rvv(uint16_t *dst, const uint16_t *src, int len);
 
 av_cold void ff_bswapdsp_init_riscv(BswapDSPContext *c)
@@ -39,10 +38,8 @@  av_cold void ff_bswapdsp_init_riscv(BswapDSPContext *c)
             c->bswap_buf = ff_bswap32_buf_rvb;
 #endif
 #if HAVE_RVV
-        if (flags & AV_CPU_FLAG_RVV_I32) {
-            c->bswap_buf = ff_bswap32_buf_rvv;
+        if (flags & AV_CPU_FLAG_RVV_I32)
             c->bswap16_buf = ff_bswap16_buf_rvv;
-        }
 #endif
     }
 }
diff --git a/libavcodec/riscv/bswapdsp_rvv.S b/libavcodec/riscv/bswapdsp_rvv.S
index 8b585ec5c9..b37fe26255 100644
--- a/libavcodec/riscv/bswapdsp_rvv.S
+++ b/libavcodec/riscv/bswapdsp_rvv.S
@@ -21,29 +21,6 @@ 
 #include "config.h"
 #include "libavutil/riscv/asm.S"
 
-func ff_bswap32_buf_rvv, zve32x
-        li      t4, 4
-        addi    t1, a0, 1
-        addi    t2, a0, 2
-        addi    t3, a0, 3
-1:
-        vsetvli    t0, a2, e8, m1, ta, ma
-        vlseg4e8.v v8, (a1)
-        sub        a2, a2, t0
-        sh2add     a1, t0, a1
-        vsse8.v    v8, (t3), t4
-        sh2add     t3, t0, t3
-        vsse8.v    v9, (t2), t4
-        sh2add     t2, t0, t2
-        vsse8.v    v10, (t1), t4
-        sh2add     t1, t0, t1
-        vsse8.v    v11, (a0), t4
-        sh2add     a0, t0, a0
-        bnez       a2, 1b
-
-        ret
-endfunc
-
 func ff_bswap16_buf_rvv, zve32x
 1:
         vsetvli t0, a2, e16, m8, ta, ma