diff mbox series

[FFmpeg-devel,2/3] lavc/vp9dsp: R-V V ipred hor

Message ID CAEa-L+sRgtyNOAZw-fHyMK-THSoaG5X7jNU10No9MkJTgBZKnw@mail.gmail.com
State New
Headers show
Series [FFmpeg-devel,1/3] lavc/vp9dsp: R-V V ipred vert | expand

Checks

Context Check Description
andriy/configure_x86 warning Failed to apply patch
yinshiyou/configure_loongarch64 warning Failed to apply patch

Commit Message

flow gg Feb. 26, 2024, 4:19 p.m. UTC

Comments

flow gg March 2, 2024, 7:41 a.m. UTC | #1
please ignore this, updated in "[FFmpeg-devel] [PATCH 1/4] lavc/vp9dsp: R-V
V ipred dc"

flow gg <hlefthleft@gmail.com> 于2024年2月27日周二 00:19写道:

>
>
diff mbox series

Patch

From e791fada3a4777fae87dec806c0b46b595d265db Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Tue, 27 Feb 2024 00:06:25 +0800
Subject: [PATCH 2/3] lavc/vp9dsp: R-V V ipred hor

C908:
vp9_hor_4x4_8bpp_c: 37.7
vp9_hor_4x4_8bpp_rvv_i32: 33.7
vp9_hor_8x8_8bpp_c: 82.7
vp9_hor_8x8_8bpp_rvv_i32: 51.5
vp9_hor_16x16_8bpp_c: 182.2
vp9_hor_16x16_8bpp_rvv_i32: 89.5
vp9_hor_32x32_8bpp_c: 518.2
vp9_hor_32x32_8bpp_rvv_i32: 270.7
---
 libavcodec/riscv/vp9dsp_init.c |  8 ++++
 libavcodec/riscv/vp9dsp_rvv.S  | 82 ++++++++++++++++++++++++++++++++++
 2 files changed, 90 insertions(+)

diff --git a/libavcodec/riscv/vp9dsp_init.c b/libavcodec/riscv/vp9dsp_init.c
index 58db936f31..5b68302235 100644
--- a/libavcodec/riscv/vp9dsp_init.c
+++ b/libavcodec/riscv/vp9dsp_init.c
@@ -25,6 +25,10 @@ 
 
 void ff_vp9_ipred_v_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a);
 void ff_vp9_ipred_v_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a);
+void ff_vp9_ipred_h_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a);
+void ff_vp9_ipred_h_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a);
+void ff_vp9_ipred_h_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a);
+void ff_vp9_ipred_h_4x4_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a);
 
 av_cold void ff_vp9dsp_init_riscv(VP9DSPContext *dsp, int bpp, int bitexact)
 {
@@ -35,6 +39,10 @@  av_cold void ff_vp9dsp_init_riscv(VP9DSPContext *dsp, int bpp, int bitexact)
             if (bpp == 8) {
                 dsp->intra_pred[TX_32X32][VERT_PRED]    = ff_vp9_ipred_v_32x32_rvv;
                 dsp->intra_pred[TX_16X16][VERT_PRED]    = ff_vp9_ipred_v_16x16_rvv;
+                dsp->intra_pred[TX_32X32][HOR_PRED]     = ff_vp9_ipred_h_32x32_rvv;
+                dsp->intra_pred[TX_16X16][HOR_PRED]     = ff_vp9_ipred_h_16x16_rvv;
+                dsp->intra_pred[TX_8X8][HOR_PRED]       = ff_vp9_ipred_h_8x8_rvv;
+                dsp->intra_pred[TX_4X4][HOR_PRED]       = ff_vp9_ipred_h_4x4_rvv;
             }
         }
     #endif
diff --git a/libavcodec/riscv/vp9dsp_rvv.S b/libavcodec/riscv/vp9dsp_rvv.S
index 0645567f1b..578fbce061 100644
--- a/libavcodec/riscv/vp9dsp_rvv.S
+++ b/libavcodec/riscv/vp9dsp_rvv.S
@@ -45,3 +45,85 @@  func ff_vp9_ipred_v_16x16_rvv, zve32x
 
         ret
 endfunc
+
+func ff_vp9_ipred_h_32x32_rvv, zve32x
+        addi      a2, a2, 31
+        li        t0, 32
+
+        .rept 2
+        vsetvli   zero, t0, e8, m2, ta, ma
+        .irp n 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30
+        lbu       t1, (a2)
+        addi      a2, a2, -1
+        vmv.v.x   v\n, t1
+        .endr
+
+        vsetivli  zero, 8, e8, mf2, ta, ma
+        .irp n 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30
+        vse32.v   v\n, (a0)
+        add       a0, a0, a1
+        .endr
+        .endr
+
+        ret
+endfunc
+
+func ff_vp9_ipred_h_16x16_rvv, zve32x
+        addi      a2, a2, 15
+        vsetivli  zero, 16, e8, m1, ta, ma
+
+        .irp n 8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23
+        lbu       t1, (a2)
+        addi      a2, a2, -1
+        vmv.v.x   v\n, t1
+        .endr
+
+        vsetivli  zero, 4, e8, mf4, ta, ma
+        .irp n 8,9,10,11,12,13,14,15,16,17,18,19,20,21,22
+        vse32.v   v\n, (a0)
+        add       a0, a0, a1
+        .endr
+        vse32.v   v23, (a0)
+
+        ret
+endfunc
+
+func ff_vp9_ipred_h_8x8_rvv, zve32x
+        addi      a2, a2, 7
+        vsetivli  zero, 8, e8, mf2, ta, ma
+
+        .irp n 8,9,10,11,12,13,14,15
+        lbu       t1, (a2)
+        addi      a2, a2, -1
+        vmv.v.x   v\n, t1
+        .endr
+
+        vsetivli  zero, 2, e8, mf4, ta, ma
+        .irp n 8,9,10,11,12,13,14
+        vse32.v   v\n, (a0)
+        add       a0, a0, a1
+        .endr
+        vse32.v   v15, (a0)
+
+        ret
+endfunc
+
+func ff_vp9_ipred_h_4x4_rvv, zve32x
+        addi      a2, a2, 3
+        vsetivli  zero, 4, e8, mf2, ta, ma
+
+        .irp n 8,9,10,11
+        lbu       t1, (a2)
+        addi      a2, a2, -1
+        vmv.v.x   v\n, t1
+        .endr
+
+        vsetivli  zero, 1, e8, mf4, ta, ma
+        .irp n 8,9,10
+        vse32.v   v\n, (a0)
+        add       a0, a0, a1
+        .endr
+        vse32.v   v11, (a0)
+
+        ret
+endfunc
-- 
2.44.0