diff mbox series

[FFmpeg-devel,6/7] lavc/vp9dsp: R-V V mc bilin hv

Message ID CAEa-L+ufSN=dKKUsEZ9+WKpn+YB1jasm3g46jTj2DiGKXAYjMA@mail.gmail.com
State New
Headers show
Series [FFmpeg-devel,1/7] lavc/vp9dsp: R-V mc copy_avg | expand

Checks

Context Check Description
andriy/configure_x86 warning Failed to apply patch
yinshiyou/configure_loongarch64 warning Failed to apply patch

Commit Message

flow gg March 22, 2024, 6:05 a.m. UTC

diff mbox series

Patch

From 5df2835fd182378b78530e001669c65f3638946d Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Thu, 21 Mar 2024 23:14:10 +0800
Subject: [PATCH 6/7] lavc/vp9dsp: R-V V mc bilin hv

C908:
vp9_avg_bilin_4hv_8bpp_c: 10.7
vp9_avg_bilin_4hv_8bpp_rvv_i64: 4.5
vp9_avg_bilin_8hv_8bpp_c: 38.7
vp9_avg_bilin_8hv_8bpp_rvv_i64: 8.2
vp9_avg_bilin_16hv_8bpp_c: 147.2
vp9_avg_bilin_16hv_8bpp_rvv_i64: 32.2
vp9_avg_bilin_32hv_8bpp_c: 590.7
vp9_avg_bilin_32hv_8bpp_rvv_i64: 47.5
vp9_avg_bilin_64hv_8bpp_c: 2323.7
vp9_avg_bilin_64hv_8bpp_rvv_i64: 153.5
vp9_put_bilin_4hv_8bpp_c: 10.0
vp9_put_bilin_4hv_8bpp_rvv_i64: 3.7
vp9_put_bilin_8hv_8bpp_c: 35.2
vp9_put_bilin_8hv_8bpp_rvv_i64: 7.2
vp9_put_bilin_16hv_8bpp_c: 133.7
vp9_put_bilin_16hv_8bpp_rvv_i64: 14.2
vp9_put_bilin_32hv_8bpp_c: 521.7
vp9_put_bilin_32hv_8bpp_rvv_i64: 43.0
vp9_put_bilin_64hv_8bpp_c: 2098.0
vp9_put_bilin_64hv_8bpp_rvv_i64: 144.5
---
 libavcodec/riscv/vp9_mc_rvv.S | 37 +++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/libavcodec/riscv/vp9_mc_rvv.S b/libavcodec/riscv/vp9_mc_rvv.S
index 2d4b56516f..1fad17266d 100644
--- a/libavcodec/riscv/vp9_mc_rvv.S
+++ b/libavcodec/riscv/vp9_mc_rvv.S
@@ -160,6 +160,37 @@ 
         ret
 .endm
 
+.macro bilin_hv len type
+.ifc \type,avg
+        csrwi           vxrm, 0
+.endif
+        neg             t1, a5
+        neg             t2, a6
+        li              t4, 8
+        li              t5, 1
+        bilin_h_load    v24, \len, put
+        add             a2, a2, a3
+1:
+        addi            a4, a4, -1
+        bilin_h_load    v4, \len, put
+        vwmulu.vx       v16, v4, a6
+        vwmaccsu.vx     v16, t2, v24
+        vwadd.wx        v16, v16, t4
+        vnsra.wi        v16, v16, 4
+        vadd.vv         v0, v16, v24
+.ifc \type,avg
+        vle8.v          v16, (a0)
+        vaaddu.vv       v0, v0, v16
+.endif
+        vse8.v          v0, (a0)
+        vmv.v.v         v24, v4
+        add             a2, a2, a3
+        add             a0, a0, a1
+        bnez            a4, 1b
+
+        ret
+.endm
+
 .irp len 64, 32, 16
 func ff_copy\len\()_rvv, zve32x
         copy_avg \len copy
@@ -437,6 +468,12 @@  endfunc
 func ff_avg_bilin_\len\()v_rvv, zve32x
         bilin_v \len avg
 endfunc
+func ff_put_bilin_\len\()hv_rvv, zve32x
+        bilin_hv \len put
+endfunc
+func ff_avg_bilin_\len\()hv_rvv, zve32x
+        bilin_hv \len avg
+endfunc
 
 .irp name regular sharp smooth
         .irp do put avg
-- 
2.44.0