diff mbox series

[FFmpeg-devel,5/6] lavc/takdsp: R-V V decorrelate_sr

Message ID CAEa-L+vsyJdqTBu444oL_KiYN03jtH2c0Kt0axwoJB1Q+cgNdA@mail.gmail.com
State New
Headers show
Series [FFmpeg-devel,1/6] checkasm/takdsp: add decorrelate_ls test | expand

Checks

Context Check Description
andriy/configure_x86 warning Failed to apply patch
yinshiyou/configure_loongarch64 warning Failed to apply patch

Commit Message

flow gg Dec. 18, 2023, 3:16 p.m. UTC
C908:
decorrelate_sr_c: 95.5
decorrelate_sr_rvv_i32: 28.2
diff mbox series

Patch

From fa1a84337a7cd2a62c26a9d5f8d707a97e917f77 Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Mon, 18 Dec 2023 22:52:20 +0800
Subject: [PATCH 5/6] lavc/takdsp: R-V V decorrelate_sr

C908:
decorrelate_sr_c: 95.5
decorrelate_sr_rvv_i32: 28.2
---
 libavcodec/riscv/takdsp_init.c |  2 ++
 libavcodec/riscv/takdsp_rvv.S  | 14 ++++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/libavcodec/riscv/takdsp_init.c b/libavcodec/riscv/takdsp_init.c
index fcf0c5f37b..0b4ec18086 100644
--- a/libavcodec/riscv/takdsp_init.c
+++ b/libavcodec/riscv/takdsp_init.c
@@ -26,6 +26,7 @@ 
 #include "libavcodec/takdsp.h"
 
 void ff_decorrelate_ls_rvv(int32_t *p1, int32_t *p2, int length);
+void ff_decorrelate_sr_rvv(int32_t *p1, int32_t *p2, int length);
 
 av_cold void ff_takdsp_init_riscv(TAKDSPContext *dsp)
 {
@@ -34,6 +35,7 @@  av_cold void ff_takdsp_init_riscv(TAKDSPContext *dsp)
 
     if ((flags & AV_CPU_FLAG_RVV_I32) && (flags & AV_CPU_FLAG_RVB_ADDR)) {
         dsp->decorrelate_ls = ff_decorrelate_ls_rvv;
+        dsp->decorrelate_sr = ff_decorrelate_sr_rvv;
     }
 #endif
 }
diff --git a/libavcodec/riscv/takdsp_rvv.S b/libavcodec/riscv/takdsp_rvv.S
index 00e8e38fdf..65c79e1aa9 100644
--- a/libavcodec/riscv/takdsp_rvv.S
+++ b/libavcodec/riscv/takdsp_rvv.S
@@ -33,3 +33,17 @@  func ff_decorrelate_ls_rvv, zve32x
         bnez a2, 1b
         ret
 endfunc
+
+func ff_decorrelate_sr_rvv, zve32x
+1:
+        vsetvli  t0, a2, e32, m8, ta, ma
+        vle32.v  v0, (a0)
+        sub      a2, a2, t0
+        vle32.v  v8, (a1)
+        sh2add   a1, t0, a1
+        vsub.vv  v16, v8, v0
+        vse32.v  v16, (a0)
+        sh2add   a0, t0, a0
+        bnez     a2, 1b
+        ret
+endfunc
-- 
2.43.0