diff mbox series

[FFmpeg-devel,3/3] lavc/h264pred: R-V V pred16x16_dc_8

Message ID CAEa-L+t=Nc=9VngoXV_h4qq52wyWSJQ47c=YEU-Gomjyn77fqA@mail.gmail.com
State New
Headers show
Series [FFmpeg-devel,1/3] lavc/h264pred: R-V V pred16x16_vertical_8 | expand

Checks

Context Check Description
andriy/configure_x86 warning Failed to apply patch
yinshiyou/configure_loongarch64 warning Failed to apply patch

Commit Message

flow gg Jan. 16, 2024, 4:16 p.m. UTC

diff mbox series

Patch

From 8c5fdbfea42e9ad6ba6e1df5e4ea3c583d59537a Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Tue, 16 Jan 2024 23:57:53 +0800
Subject: [PATCH 3/3] lavc/h264pred: R-V V pred16x16_dc_8

C908
pred16x16_dc_8_c: 2.5
pred16x16_dc_8_rvv_i32: 1.7
---
 libavcodec/riscv/h264pred_init.c |  2 ++
 libavcodec/riscv/h264pred_rvv.S  | 28 ++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/libavcodec/riscv/h264pred_init.c b/libavcodec/riscv/h264pred_init.c
index 8665bc729e..e8d5b7dd8f 100644
--- a/libavcodec/riscv/h264pred_init.c
+++ b/libavcodec/riscv/h264pred_init.c
@@ -26,6 +26,7 @@ 
 
 void ff_pred16x16_vertical_8_rvv(uint8_t *src, ptrdiff_t stride);
 void ff_pred16x16_horizontal_8_rvv(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_dc_8_rvv(uint8_t *src, ptrdiff_t stride);
 
 av_cold void ff_h264_pred_init_riscv(H264PredContext *h, int codec_id,
                                    const int bit_depth,
@@ -38,6 +39,7 @@  av_cold void ff_h264_pred_init_riscv(H264PredContext *h, int codec_id,
             if (flags & AV_CPU_FLAG_RVV_I32) {
                 h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_8_rvv;
                 h->pred16x16[HOR_PRED8x8] = ff_pred16x16_horizontal_8_rvv;
+                h->pred16x16[DC_PRED8x8] = ff_pred16x16_dc_8_rvv;
             }
         #endif
     }
diff --git a/libavcodec/riscv/h264pred_rvv.S b/libavcodec/riscv/h264pred_rvv.S
index ba1e9045e1..1492991ef4 100644
--- a/libavcodec/riscv/h264pred_rvv.S
+++ b/libavcodec/riscv/h264pred_rvv.S
@@ -48,3 +48,31 @@  func ff_pred16x16_horizontal_8_rvv, zve32x
 
         ret
 endfunc
+
+func ff_pred16x16_dc_8_rvv, zve32x
+        vsetivli     zero, 1, e16, m1, ta, ma
+        vmv.v.x      v16, zero
+
+        vsetivli     zero, 16, e8, m1, ta, ma
+        sub          t2, a0, a1
+        vle8.v       v8, (t2)
+        vwredsumu.vs v16, v8, v16
+        addi         t2, a0, -1
+        vlse8.v      v8, (t2), a1
+        vwredsumu.vs v16, v8, v16
+        vsetivli     zero, 1, e16, m1, ta, ma
+        vmv.x.s      t1, v16
+        addi         t1, t1, 16
+        srai         t1, t1, 5
+        vsetivli     zero, 16, e8, m1, ta, ma
+        vmv.v.x      v0, t1
+        vsetivli     zero, 4, e8, mf4, ta, ma
+        li           t0, 16
+1:
+        vse32.v      v0, (a0)
+        addi         t0, t0, -1
+        add          a0, a0, a1
+        bnez         t0, 1b
+
+        ret
+endfunc
-- 
2.43.0