[FFmpeg-devel,2/3] lavc/vp8dsp: R-V V put_epel v

Message ID	CAEa-L+ucAaY=sPBkoDQBMfzcys=ny7SBuYvw2tEeD-FVwRBbog@mail.gmail.com
State	New
Headers	show Delivered-To: ffmpegpatchwork2@gmail.com Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; MIME-Version: 1.0 From: flow gg <hlefthleft@gmail.com> Date: Fri, 22 Mar 2024 14:01:21 +0800 Message-ID: <CAEa-L+ucAaY=sPBkoDQBMfzcys=ny7SBuYvw2tEeD-FVwRBbog@mail.gmail.com> To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Content-Type: multipart/mixed; boundary="0000000000004a4c1b0614398eb3" Subject: [FFmpeg-devel] [PATCH 2/3] lavc/vp8dsp: R-V V put_epel v Precedence: list Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>
Series	[FFmpeg-devel,1/3] lavc/vp8dsp: R-V V put_epel h \| expand [FFmpeg-devel,1/3] lavc/vp8dsp: R-V V put_epel h [FFmpeg-devel,2/3] lavc/vp8dsp: R-V V put_epel v [FFmpeg-devel,3/3] lavc/vp8dsp: R-V V put_epel hv

Message ID

CAEa-L+ucAaY=sPBkoDQBMfzcys=ny7SBuYvw2tEeD-FVwRBbog@mail.gmail.com

State

New

Headers

Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org
 designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100;
MIME-Version: 1.0
From: flow gg <hlefthleft@gmail.com>
Date: Fri, 22 Mar 2024 14:01:21 +0800
Message-ID: 
 <CAEa-L+ucAaY=sPBkoDQBMfzcys=ny7SBuYvw2tEeD-FVwRBbog@mail.gmail.com>
To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Content-Type: multipart/mixed; boundary="0000000000004a4c1b0614398eb3"
Subject: [FFmpeg-devel] [PATCH 2/3] lavc/vp8dsp: R-V V put_epel v
Precedence: list
Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Errors-To: ffmpeg-devel-bounces@ffmpeg.org
Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

Series

[FFmpeg-devel,1/3] lavc/vp8dsp: R-V V put_epel h | expand

Context	Check	Description
andriy/configure_x86	warning	Failed to apply patch
yinshiyou/configure_loongarch64	warning	Failed to apply patch

Context

Check

Description

andriy/configure_x86

warning

Failed to apply patch

yinshiyou/configure_loongarch64

warning

Failed to apply patch

Comments

Rémi Denis-Courmont March 27, 2024, 3:36 p.m. UTC | #1

Le perjantaina 22. maaliskuuta 2024, 8.01.21 EET flow gg a écrit :
> 

IMO, you could just as well share the code and avoid most if's. Not like one 
additional `li a3, 1` per function call is going to matter in the grand scheme 
of things. It might even help by reducing I-cache pressure.

flow gg March 28, 2024, 2:16 a.m. UTC | #2

Okay, changed in the reply and github (another reason for not doing so
initially was the thought that there weren't enough registers available,
and that other changes would need to be made that could cause side effects,
but now it's found that the vp8 registers are sufficient.. it's just that
vp9 doesn't have enough)

Rémi Denis-Courmont <remi@remlab.net> 于2024年3月27日周三 23:36写道：

> Le perjantaina 22. maaliskuuta 2024, 8.01.21 EET flow gg a écrit :
> >
>
> IMO, you could just as well share the code and avoid most if's. Not like
> one
> additional `li a3, 1` per function call is going to matter in the grand
> scheme
> of things. It might even help by reducing I-cache pressure.
>
> --
> 雷米‧德尼-库尔蒙
> http://www.remlab.net/
>
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>

From a59509c554a319f8271ad4175da40788445f7a56 Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Thu, 21 Mar 2024 17:49:54 +0800
Subject: [PATCH 2/3] lavc/vp8dsp: R-V V put_epel v

C908:
vp8_put_epel4_v4_c: 11.0
vp8_put_epel4_v4_rvv_i32: 5.0
vp8_put_epel4_v6_c: 16.5
vp8_put_epel4_v6_rvv_i32: 6.2
vp8_put_epel8_v4_c: 43.7
vp8_put_epel8_v4_rvv_i32: 11.2
vp8_put_epel8_v6_c: 68.7
vp8_put_epel8_v6_rvv_i32: 13.2
vp8_put_epel16_v4_c: 92.5
vp8_put_epel16_v4_rvv_i32: 13.7
vp8_put_epel16_v6_c: 135.7
vp8_put_epel16_v6_rvv_i32: 16.5
---
 libavcodec/riscv/vp8dsp_init.c |  7 ++++++
 libavcodec/riscv/vp8dsp_rvv.S  | 44 +++++++++++++++++++++++++++-------
 2 files changed, 42 insertions(+), 9 deletions(-)

diff --git a/libavcodec/riscv/vp8dsp_init.c b/libavcodec/riscv/vp8dsp_init.c
index 6614d661f7..2f123b67fe 100644
--- a/libavcodec/riscv/vp8dsp_init.c
+++ b/libavcodec/riscv/vp8dsp_init.c
@@ -85,6 +85,13 @@  av_cold void ff_vp78dsp_init_riscv(VP8DSPContext *c)
         c->put_vp8_epel_pixels_tab[0][0][1] = ff_put_vp8_epel16_h4_rvv;
         c->put_vp8_epel_pixels_tab[1][0][1] = ff_put_vp8_epel8_h4_rvv;
         c->put_vp8_epel_pixels_tab[2][0][1] = ff_put_vp8_epel4_h4_rvv;
+
+        c->put_vp8_epel_pixels_tab[0][2][0] = ff_put_vp8_epel16_v6_rvv;
+        c->put_vp8_epel_pixels_tab[1][2][0] = ff_put_vp8_epel8_v6_rvv;
+        c->put_vp8_epel_pixels_tab[2][2][0] = ff_put_vp8_epel4_v6_rvv;
+        c->put_vp8_epel_pixels_tab[0][1][0] = ff_put_vp8_epel16_v4_rvv;
+        c->put_vp8_epel_pixels_tab[1][1][0] = ff_put_vp8_epel8_v4_rvv;
+        c->put_vp8_epel_pixels_tab[2][1][0] = ff_put_vp8_epel4_v4_rvv;
     }
 #endif
 }
diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S
index a0dd46e3a8..134154acfc 100644
--- a/libavcodec/riscv/vp8dsp_rvv.S
+++ b/libavcodec/riscv/vp8dsp_rvv.S
@@ -233,9 +233,13 @@  subpel_filters:
         .byte 1,  -8,  36, 108, -11, 2
         .byte 0,  -1,  12, 123,  -6, 0
 
-.macro epel_filter size
+.macro epel_filter size type
         lla             t2, subpel_filters
+.ifc \type,v
+        addi            t0, a6, -1
+.elseif \type == h
         addi            t0, a5, -1
+.endif
         li              t1, 6
         mul             t0, t0, t1
         add             t0, t0, t2
@@ -248,19 +252,33 @@  subpel_filters:
 .endif
 .endm
 
-.macro epel_load dst len size
+.macro epel_load dst len size type
+.ifc \type,v
+        sub             t6, a2, a3
+        add             a7, a2, a3
+.elseif \type == h
         addi            t6, a2, -1
         addi            a7, a2, 1
+.endif
         vle8.v          v24, (a2)
         vle8.v          v22, (t6)
         vle8.v          v26, (a7)
+.ifc \type,v
+        add             a7, a7, a3
+.elseif \type == h
         addi            a7, a7, 1
+.endif
         vle8.v          v28, (a7)
         vwmulu.vx       v16, v24, t2
         vwmulu.vx       v20, v26, t3
 .ifc \size,6
+.ifc \type,v
+        sub             t6, t6, a3
+        add             a7, a7, a3
+.elseif \type == h
         addi            t6, t6, -1
         addi            a7, a7, 1
+.endif
         vle8.v          v24, (t6)
         vle8.v          v26, (a7)
         vwmaccu.vx      v16, t0, v24
@@ -292,13 +310,13 @@  subpel_filters:
         vnclipu.wi      \dst, v24, 0
 .endm
 
-.macro epel_load_inc dst len size
-        epel_load       \dst \len \size
+.macro epel_load_inc dst len size type
+        epel_load       \dst \len \size \type
         add             a2, a2, a3
 .endm
 
-.macro epel len size
-        epel_filter     \size
+.macro epel len size type
+        epel_filter     \size \type
 
 .ifc \len,4
         vsetivli        zero, 4, e8, mf4, ta, ma
@@ -310,7 +328,7 @@  subpel_filters:
 
 1:
         addi            a4, a4, -1
-        epel_load_inc   v30 \len \size
+        epel_load_inc   v30 \len \size \type
         vse8.v          v30, (a0)
         add             a0, a0, a1
         bnez            a4, 1b
@@ -320,10 +338,18 @@  subpel_filters:
 
 .irp len 16,8,4
 func ff_put_vp8_epel\len\()_h6_rvv, zve32x
-        epel \len 6
+        epel \len 6 h
 endfunc
 
 func ff_put_vp8_epel\len\()_h4_rvv, zve32x
-        epel \len 4
+        epel \len 4 h
+endfunc
+
+func ff_put_vp8_epel\len\()_v6_rvv, zve32x
+        epel \len 6 v
+endfunc
+
+func ff_put_vp8_epel\len\()_v4_rvv, zve32x
+        epel \len 4 v
 endfunc
 .endr
-- 
2.44.0

[FFmpeg-devel,2/3] lavc/vp8dsp: R-V V put_epel v

Checks

Commit Message

Comments

Patch