diff mbox series

[FFmpeg-devel,2/4] lavc/vp9dsp: R-V V ipred vert

Message ID CAEa-L+tcAyGUmeRC_F8tQ0k5-noR7JYGRdi=rTmgET3LyvR+qw@mail.gmail.com
State New
Headers show
Series [FFmpeg-devel,1/4] lavc/vp9dsp: R-V V ipred dc | expand

Checks

Context Check Description
andriy/configure_x86 warning Failed to apply patch
yinshiyou/configure_loongarch64 warning Failed to apply patch

Commit Message

flow gg March 2, 2024, 7:42 a.m. UTC

Comments

flow gg March 3, 2024, 2:01 a.m. UTC | #1
Due to the PATCH 1/4 update, updates here.

flow gg <hlefthleft@gmail.com> 于2024年3月2日周六 15:42写道:

>
>
flow gg March 22, 2024, 6:02 a.m. UTC | #2
Because the previous patch was updated, so it was updated in this response

flow gg <hlefthleft@gmail.com> 于2024年3月3日周日 10:01写道:

> Due to the PATCH 1/4 update, updates here.
>
> flow gg <hlefthleft@gmail.com> 于2024年3月2日周六 15:42写道:
>
>>
>>
diff mbox series

Patch

From 7abd262daa281cee412a905ea75a5f10dd0b1fbe Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Fri, 1 Mar 2024 18:38:43 +0800
Subject: [PATCH 2/4] lavc/vp9dsp: R-V V ipred vert

C908:
vp9_vert_8x8_8bpp_c: 22.0
vp9_vert_8x8_8bpp_rvv_i64: 18.5
vp9_vert_16x16_8bpp_c: 71.2
vp9_vert_16x16_8bpp_rvv_i32: 50.7
vp9_vert_32x32_8bpp_c: 300.2
vp9_vert_32x32_8bpp_rvv_i32: 136.7
---
 libavcodec/riscv/vp9_intra_rvv.S | 35 ++++++++++++++++++++++++++++++++
 libavcodec/riscv/vp9dsp.h        |  6 ++++++
 libavcodec/riscv/vp9dsp_init.c   |  3 +++
 3 files changed, 44 insertions(+)

diff --git a/libavcodec/riscv/vp9_intra_rvv.S b/libavcodec/riscv/vp9_intra_rvv.S
index b3b0470cfc..88b54f37b0 100644
--- a/libavcodec/riscv/vp9_intra_rvv.S
+++ b/libavcodec/riscv/vp9_intra_rvv.S
@@ -199,3 +199,38 @@  endfunc
 func ff_dc_top_8x8_rvv, zve64x
         dc8x8 top
 endfunc
+
+func ff_v_32x32_rvv, zve32x
+        vsetivli     zero, 8, e8, mf2, ta, ma
+        vle32.v      v8, (a3)
+
+        .rept 31
+        vse32.v      v8, (a0)
+        add          a0, a0, a1
+        .endr
+        vse32.v      v8, (a0)
+
+        ret
+endfunc
+
+func ff_v_16x16_rvv, zve32x
+        vsetivli     zero, 4, e8, mf4, ta, ma
+        vle32.v      v8, (a3)
+
+        .rept 15
+        vse32.v      v8, (a0)
+        add          a0, a0, a1
+        .endr
+        vse32.v      v8, (a0)
+
+        ret
+endfunc
+
+func ff_v_8x8_rvv, zve64x
+        ld           t0, (a3)
+        vsetivli     zero, 8, e64, m4, ta, ma
+        vmv.v.x      v8, t0
+        vsse64.v     v8, (a0), a1
+
+        ret
+endfunc
diff --git a/libavcodec/riscv/vp9dsp.h b/libavcodec/riscv/vp9dsp.h
index abd57bd836..ae4fb266d0 100644
--- a/libavcodec/riscv/vp9dsp.h
+++ b/libavcodec/riscv/vp9dsp.h
@@ -60,5 +60,11 @@  void ff_dc_129_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
                          const uint8_t *a);
 void ff_dc_129_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
                        const uint8_t *a);
+void ff_v_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                    const uint8_t *a);
+void ff_v_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                    const uint8_t *a);
+void ff_v_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                  const uint8_t *a);
 
 #endif  // #ifndef AVCODEC_RISCV_VP9DSP_RISCV_H
diff --git a/libavcodec/riscv/vp9dsp_init.c b/libavcodec/riscv/vp9dsp_init.c
index 69ab39004c..9c550d40b5 100644
--- a/libavcodec/riscv/vp9dsp_init.c
+++ b/libavcodec/riscv/vp9dsp_init.c
@@ -36,6 +36,7 @@  static av_cold void vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp)
             dsp->intra_pred[TX_8X8][DC_128_PRED] = ff_dc_128_8x8_rvv;
             dsp->intra_pred[TX_8X8][DC_129_PRED] = ff_dc_129_8x8_rvv;
             dsp->intra_pred[TX_8X8][TOP_DC_PRED] = ff_dc_top_8x8_rvv;
+            dsp->intra_pred[TX_8X8][VERT_PRED] = ff_v_8x8_rvv;
         }
 
         if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) {
@@ -51,6 +52,8 @@  static av_cold void vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp)
             dsp->intra_pred[TX_16X16][DC_129_PRED] = ff_dc_129_16x16_rvv;
             dsp->intra_pred[TX_32X32][TOP_DC_PRED] = ff_dc_top_32x32_rvv;
             dsp->intra_pred[TX_16X16][TOP_DC_PRED] = ff_dc_top_16x16_rvv;
+            dsp->intra_pred[TX_32X32][VERT_PRED] = ff_v_32x32_rvv;
+            dsp->intra_pred[TX_16X16][VERT_PRED] = ff_v_16x16_rvv;
         }
     #endif
 }
-- 
2.44.0