diff mbox series

[FFmpeg-devel,4/7] lavc/vp9dsp: R-V V mc bilin v

Message ID CAEa-L+tLwkOZ3HGroeABHZwo3Fh-=z__AvYP41TT6UWRQ49fBA@mail.gmail.com
State New
Headers show
Series [FFmpeg-devel,1/7] lavc/vp9dsp: R-V mc copy_avg | expand

Checks

Context Check Description
andriy/configure_x86 warning Failed to apply patch
yinshiyou/configure_loongarch64 warning Failed to apply patch

Commit Message

flow gg March 22, 2024, 6:04 a.m. UTC

diff mbox series

Patch

From eb004dcf5cc6a3c379cb6cb7b8592afa65626c5c Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Thu, 21 Mar 2024 23:00:19 +0800
Subject: [PATCH 4/7] lavc/vp9dsp: R-V V mc bilin v

C908:
vp9_avg_bilin_4v_8bpp_c: 5.5
vp9_avg_bilin_4v_8bpp_rvv_i64: 2.2
vp9_avg_bilin_8v_8bpp_c: 20.7
vp9_avg_bilin_8v_8bpp_rvv_i64: 4.2
vp9_avg_bilin_16v_8bpp_c: 82.2
vp9_avg_bilin_16v_8bpp_rvv_i64: 9.0
vp9_avg_bilin_32v_8bpp_c: 342.5
vp9_avg_bilin_32v_8bpp_rvv_i64: 27.0
vp9_avg_bilin_64v_8bpp_c: 1319.2
vp9_avg_bilin_64v_8bpp_rvv_i64: 93.2
vp9_put_bilin_4v_8bpp_c: 4.7
vp9_put_bilin_4v_8bpp_rvv_i64: 1.7
vp9_put_bilin_8v_8bpp_c: 17.7
vp9_put_bilin_8v_8bpp_rvv_i64: 3.2
vp9_put_bilin_16v_8bpp_c: 69.2
vp9_put_bilin_16v_8bpp_rvv_i64: 7.5
vp9_put_bilin_32v_8bpp_c: 274.2
vp9_put_bilin_32v_8bpp_rvv_i64: 23.2
vp9_put_bilin_64v_8bpp_c: 1109.5
vp9_put_bilin_64v_8bpp_rvv_i64: 82.2
---
 libavcodec/riscv/vp9_mc_rvv.S | 49 +++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/libavcodec/riscv/vp9_mc_rvv.S b/libavcodec/riscv/vp9_mc_rvv.S
index eacc174bc4..9458a2e82b 100644
--- a/libavcodec/riscv/vp9_mc_rvv.S
+++ b/libavcodec/riscv/vp9_mc_rvv.S
@@ -117,6 +117,49 @@ 
         ret
 .endm
 
+.macro bilin_v len type
+.ifc \type,avg
+        csrwi           vxrm, 0
+.endif
+.ifc \len,4
+        vsetivli        zero, 4, e8, mf4, ta, ma
+.elseif \len == 8
+        vsetivli        zero, 8, e8, mf2, ta, ma
+.elseif \len == 16
+        vsetivli        zero, 16, e8, m1, ta, ma
+.elseif \len == 32
+        li              t0, 32
+        vsetvli         zero, t0, e8, m2, ta, ma
+.elseif \len == 64
+        li              t0, 64
+        vsetvli         zero, t0, e8, m4, ta, ma
+.endif
+        li              t4, 8
+        neg             t1, a6
+1:
+        add             t2, a2, a3
+        addi            a4, a4, -1
+        vle8.v          v0, (a2)
+        vle8.v          v8, (t2)
+.ifc \type,avg
+        vle8.v          v16, (a0)
+.endif
+        vwmulu.vx       v24, v8, a6
+        vwmaccsu.vx     v24, t1, v0
+        vwadd.wx        v24, v24, t4
+        vnsra.wi        v24, v24, 4
+        vadd.vv         v0, v24, v0
+.ifc \type,avg
+        vaaddu.vv       v0, v0, v16
+.endif
+        vse8.v          v0, (a0)
+        add             a2, a2, a3
+        add             a0, a0, a1
+        bnez            a4, 1b
+
+        ret
+.endm
+
 .irp len 64, 32, 16
 func ff_copy\len\()_rvv, zve32x
         copy_avg \len copy
@@ -358,6 +401,12 @@  endfunc
 func ff_avg_bilin_\len\()h_rvv, zve32x
         bilin_h \len avg
 endfunc
+func ff_put_bilin_\len\()v_rvv, zve32x
+        bilin_v \len put
+endfunc
+func ff_avg_bilin_\len\()v_rvv, zve32x
+        bilin_v \len avg
+endfunc
 
 .irp name regular sharp smooth
         .irp do put avg
-- 
2.44.0