diff mbox series

[FFmpeg-devel,2/3] lavc/vp8dsp: R-V V vp8_idct_dc_add4y

Message ID CAEa-L+tx-_eDag6ney7ZyhLCLe2zn_otJvQZ5dZoWzELDWdVrw@mail.gmail.com
State New
Headers show
Series [FFmpeg-devel,1/3] lavc/vp8dsp: R-V V vp8_idct_dc_add | expand

Checks

Context Check Description
andriy/configure_x86 warning Failed to apply patch
yinshiyou/configure_loongarch64 warning Failed to apply patch

Commit Message

flow gg Feb. 2, 2024, 5:03 a.m. UTC

Comments

Rémi Denis-Courmont Feb. 12, 2024, 7:54 p.m. UTC | #1
Hi,

To avoid repeating the code, you can either use .repr or .irp. You can even 
use assembler conditionals to elide the redundant code on the last iteration.
flow gg Feb. 13, 2024, 2:34 a.m. UTC | #2
Okay, updated it in the reply

Rémi Denis-Courmont <remi@remlab.net> 于2024年2月13日周二 03:54写道:

> Hi,
>
> To avoid repeating the code, you can either use .repr or .irp. You can
> even
> use assembler conditionals to elide the redundant code on the last
> iteration.
>
> --
> レミ・デニ-クールモン
> http://www.remlab.net/
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
diff mbox series

Patch

From 1388287e7fb00cae1709bc3b82bc373fff4cd0a8 Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Fri, 2 Feb 2024 12:50:07 +0800
Subject: [PATCH 2/3] lavc/vp8dsp: R-V V vp8_idct_dc_add4y

c908:
vp8_idct_dc_add4y_c: 368.5
vp8_idct_dc_add4y_rvv_i32: 134.5
---
 libavcodec/riscv/vp8dsp_init.c |  2 ++
 libavcodec/riscv/vp8dsp_rvv.S  | 16 ++++++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/libavcodec/riscv/vp8dsp_init.c b/libavcodec/riscv/vp8dsp_init.c
index ab020070eb..6615d3d440 100644
--- a/libavcodec/riscv/vp8dsp_init.c
+++ b/libavcodec/riscv/vp8dsp_init.c
@@ -26,6 +26,7 @@ 
 #include "libavcodec/vp8dsp.h"
 
 void ff_vp8_idct_dc_add_rvv(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
+void ff_vp8_idct_dc_add4y_rvv(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride);
 
 av_cold void ff_vp8dsp_init_riscv(VP8DSPContext *c)
 {
@@ -34,6 +35,7 @@  av_cold void ff_vp8dsp_init_riscv(VP8DSPContext *c)
 
     if (flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) {
         c->vp8_idct_dc_add = ff_vp8_idct_dc_add_rvv;
+        c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_rvv;
     }
 #endif
 }
diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S
index 8609b79937..87c369fb16 100644
--- a/libavcodec/riscv/vp8dsp_rvv.S
+++ b/libavcodec/riscv/vp8dsp_rvv.S
@@ -36,9 +36,25 @@ 
         vsse32.v      v0, (a0), a2
 .endm
 
+.macro vp8_idct_dc_addy
+        vp8_idct_dc_add
+        addi          a0, a0, 4
+        addi          a1, a1, 32
+.endm
+
 func ff_vp8_idct_dc_add_rvv, zve32x
         vsetivli      zero, 4, e8, mf4, ta, ma
         vp8_idct_dc_add
 
         ret
 endfunc
+
+func ff_vp8_idct_dc_add4y_rvv, zve32x
+        vsetivli      zero, 4, e8, mf4, ta, ma
+        vp8_idct_dc_addy
+        vp8_idct_dc_addy
+        vp8_idct_dc_addy
+        vp8_idct_dc_add
+
+        ret
+endfunc
-- 
2.43.0