diff mbox series

[FFmpeg-devel,2/7] lavc/vp9dsp: R-V V mc bilin h

Message ID CAEa-L+vK=BhUrxz=AR4QHC0Qu6_ZXNKPYG2G4hhc6=AhyKUmmw@mail.gmail.com
State New
Headers show
Series [FFmpeg-devel,1/7] lavc/vp9dsp: R-V mc copy_avg | expand

Checks

Context Check Description
andriy/configure_x86 warning Failed to apply patch
yinshiyou/configure_loongarch64 warning Failed to apply patch

Commit Message

flow gg March 22, 2024, 6:04 a.m. UTC

diff mbox series

Patch

From 7ad03f4bc70e4c334d8e52dce2ea2b6f09a9a244 Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Thu, 21 Mar 2024 22:11:26 +0800
Subject: [PATCH 2/7] lavc/vp9dsp: R-V V mc bilin h

C908:
vp9_avg_bilin_4h_8bpp_c: 5.5
vp9_avg_bilin_4h_8bpp_rvv_i64: 2.5
vp9_avg_bilin_8h_8bpp_c: 19.7
vp9_avg_bilin_8h_8bpp_rvv_i64: 5.0
vp9_avg_bilin_16h_8bpp_c: 78.2
vp9_avg_bilin_16h_8bpp_rvv_i64: 10.0
vp9_avg_bilin_32h_8bpp_c: 325.2
vp9_avg_bilin_32h_8bpp_rvv_i64: 28.5
vp9_avg_bilin_64h_8bpp_c: 1266.2
vp9_avg_bilin_64h_8bpp_rvv_i64: 115.0
vp9_put_bilin_4h_8bpp_c: 4.5
vp9_put_bilin_4h_8bpp_rvv_i64: 2.2
vp9_put_bilin_8h_8bpp_c: 16.7
vp9_put_bilin_8h_8bpp_rvv_i64: 4.2
vp9_put_bilin_16h_8bpp_c: 65.2
vp9_put_bilin_16h_8bpp_rvv_i64: 8.7
vp9_put_bilin_32h_8bpp_c: 273.5
vp9_put_bilin_32h_8bpp_rvv_i64: 26.7
vp9_put_bilin_64h_8bpp_c: 1041.0
vp9_put_bilin_64h_8bpp_rvv_i64: 87.2
---
 libavcodec/riscv/vp9_mc_rvv.S  | 73 ++++++++++++++++++++++++++++++++++
 libavcodec/riscv/vp9dsp_init.c | 17 ++++++++
 2 files changed, 90 insertions(+)

diff --git a/libavcodec/riscv/vp9_mc_rvv.S b/libavcodec/riscv/vp9_mc_rvv.S
index ba9ec3431f..a97807633e 100644
--- a/libavcodec/riscv/vp9_mc_rvv.S
+++ b/libavcodec/riscv/vp9_mc_rvv.S
@@ -51,6 +51,72 @@ 
         ret
 .endm
 
+.macro bilin_h_load dst len type
+.ifc \len,4
+        vsetivli        zero, 5, e8, mf2, ta, ma
+.elseif \len == 8
+        vsetivli        zero, 9, e8, m1, ta, ma
+.elseif \len == 16
+        vsetivli        zero, 17, e8, m2, ta, ma
+.elseif \len == 32
+        li              t0, 33
+        vsetvli         zero, t0, e8, m4, ta, ma
+.elseif \len == 64
+        li              t0, 65
+        vsetvli         zero, t0, e8, m8, ta, ma
+.endif
+
+        vle8.v          v8, (a2)
+        vslide1down.vx  v0, v8, t5
+
+.ifc \len,4
+        vsetivli        zero, 4, e8, mf4, ta, ma
+.elseif \len == 8
+        vsetivli        zero, 8, e8, mf2, ta, ma
+.elseif \len == 16
+        vsetivli        zero, 16, e8, m1, ta, ma
+.elseif \len == 32
+        li              t0, 32
+        vsetvli         zero, t0, e8, m2, ta, ma
+.elseif \len == 64
+        li              t0, 64
+        vsetvli         zero, t0, e8, m4, ta, ma
+.endif
+
+        vwmulu.vx       v16, v0, a5
+        vwmaccsu.vx     v16, t1, v8
+        vwadd.wx        v16, v16, t4
+        vnsra.wi        v16, v16, 4
+        vadd.vv         \dst, v16, v8
+
+.ifc \type,put
+        vadd.vv         \dst, v16, v8
+.elseif \type == avg
+        vadd.vv         v16, v16, v8
+        vle8.v          \dst, (a0)
+        vaaddu.vv       \dst, \dst, v16
+.endif
+
+.endm
+
+.macro bilin_h len type
+.ifc \type,avg
+        csrwi           vxrm, 0
+.endif
+        li              t4, 8
+        li              t5, 1
+        neg             t1, a5
+1:
+        addi            a4, a4, -1
+        bilin_h_load    v0, \len, \type
+        vse8.v          v0, (a0)
+        add             a2, a2, a3
+        add             a0, a0, a1
+        bnez            a4, 1b
+
+        ret
+.endm
+
 .irp len 64, 32, 16
 func ff_copy\len\()_rvv, zve32x
         copy_avg \len copy
@@ -61,4 +127,11 @@  endfunc
 func ff_avg\len\()_rvv, zve32x
         copy_avg \len avg
 endfunc
+
+func ff_put_bilin_\len\()h_rvv, zve32x
+        bilin_h \len put
+endfunc
+func ff_avg_bilin_\len\()h_rvv, zve32x
+        bilin_h \len avg
+endfunc
 .endr
diff --git a/libavcodec/riscv/vp9dsp_init.c b/libavcodec/riscv/vp9dsp_init.c
index c602c38bb2..d6d6fb52cc 100644
--- a/libavcodec/riscv/vp9dsp_init.c
+++ b/libavcodec/riscv/vp9dsp_init.c
@@ -106,6 +106,23 @@  static av_cold void vp9dsp_mc_init_rvv(VP9DSPContext *dsp, int bpp)
 #undef init_copy_avg
 #undef init_avg
 #undef init_fpel
+
+#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type)  \
+    dsp->mc[idx1][FILTER_BILINEAR    ][idx2][idxh][idxv] =   \
+        ff_##type##_bilin_##sz##dir##_rvv;
+
+#define init_subpel2(idx, idxh, idxv, dir, type)      \
+    init_subpel1(0, idx, idxh, idxv, 64, dir, type);  \
+    init_subpel1(1, idx, idxh, idxv, 32, dir, type);  \
+    init_subpel1(2, idx, idxh, idxv, 16, dir, type);  \
+    init_subpel1(3, idx, idxh, idxv,  8, dir, type);  \
+    init_subpel1(4, idx, idxh, idxv,  4, dir, type)
+
+#define init_subpel3(idx, type)         \
+    init_subpel2(idx, 1, 0, h, type)
+
+    init_subpel3(0, put);
+    init_subpel3(1, avg);
     }
 #endif
 #endif
-- 
2.44.0