[FFmpeg-devel,v2,11/14] aarch64/vvc: Add put_epel_h

Message ID	tencent_981C554B8EA61B10AE26D327EEEA9D532206@qq.com
State	New
Headers	show Delivered-To: ffmpegpatchwork2@gmail.com Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; Message-ID: <tencent_981C554B8EA61B10AE26D327EEEA9D532206@qq.com> From: Zhao Zhili <quinkblack@foxmail.com> To: ffmpeg-devel@ffmpeg.org Date: Thu, 12 Sep 2024 02:06:15 +0800 In-Reply-To: <20240911180618.28921-1-quinkblack@foxmail.com> References: <20240911180618.28921-1-quinkblack@foxmail.com> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH v2 11/14] aarch64/vvc: Add put_epel_h Precedence: list Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Cc: Zhao Zhili <zhilizhao@tencent.com> Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>
Series	aarch64/vvc: Add SIMD \| expand [FFmpeg-devel,v2,00/14] aarch64/vvc: Add SIMD [FFmpeg-devel,v2,02/14] aarch64/hevc: Move epel/qpel to h26x directory [FFmpeg-devel,v2,03/14] aarch64/vvc: Add put_qpel_h_* and put_qpel_uni_h_* [FFmpeg-devel,v2,04/14] aarch64/vvc: Add put_pel/put_pel_uni/put_pel_uni_w [FFmpeg-devel,v2,05/14] aarch64/vvc: Add put_qpel_hx i8mm [FFmpeg-devel,v2,06/14] avcodec/hevc: ff_hevc_(qpel/epel)_filters are signed type [FFmpeg-devel,v2,07/14] aarch64/h26x: Remove duplicate b.eq instruction [FFmpeg-devel,v2,08/14] aarch64/vvc: Add put_qpel_vx [FFmpeg-devel,v2,09/14] aarch64/vvc: Add put_qpel_hv [FFmpeg-devel,v2,10/14] aarch64/vvc: Add sad [FFmpeg-devel,v2,11/14] aarch64/vvc: Add put_epel_h [FFmpeg-devel,v2,12/14] aarch64/vvc: Add put_epel_h i8mm [FFmpeg-devel,v2,13/14] aarch64/vvc: Add put_epel_hv [FFmpeg-devel,v2,14/14] aarch64/vvc: Add avg

Message ID

tencent_981C554B8EA61B10AE26D327EEEA9D532206@qq.com

State

New

Headers

Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org
 designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100;
Message-ID: <tencent_981C554B8EA61B10AE26D327EEEA9D532206@qq.com>
From: Zhao Zhili <quinkblack@foxmail.com>
To: ffmpeg-devel@ffmpeg.org
Date: Thu, 12 Sep 2024 02:06:15 +0800
In-Reply-To: <20240911180618.28921-1-quinkblack@foxmail.com>
References: <20240911180618.28921-1-quinkblack@foxmail.com>
MIME-Version: 1.0
Subject: [FFmpeg-devel] [PATCH v2 11/14] aarch64/vvc: Add put_epel_h
Precedence: list
Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Cc: Zhao Zhili <zhilizhao@tencent.com>
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Errors-To: ffmpeg-devel-bounces@ffmpeg.org
Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

Series

aarch64/vvc: Add SIMD | expand

Commit Message

Zhao Zhili Sept. 11, 2024, 6:06 p.m. UTC

From: Zhao Zhili <zhilizhao@tencent.com>

put_chroma_h_8_4x4_c:                                    0.2 ( 1.00x)
put_chroma_h_8_4x4_neon:                                 0.2 ( 1.00x)
put_chroma_h_8_8x8_c:                                    0.8 ( 1.00x)
put_chroma_h_8_8x8_neon:                                 0.2 ( 3.00x)
put_chroma_h_8_16x16_c:                                  3.8 ( 1.00x)
put_chroma_h_8_16x16_neon:                               0.8 ( 5.00x)
put_chroma_h_8_32x32_c:                                 12.5 ( 1.00x)
put_chroma_h_8_32x32_neon:                               2.2 ( 5.56x)
put_chroma_h_8_64x64_c:                                 47.0 ( 1.00x)
put_chroma_h_8_64x64_neon:                               8.8 ( 5.37x)
put_chroma_h_8_128x128_c:                              200.2 ( 1.00x)
put_chroma_h_8_128x128_neon:                            31.8 ( 6.31x)
---
 libavcodec/aarch64/h26x/dsp.h       |  3 +++
 libavcodec/aarch64/h26x/epel_neon.S | 30 +++++++++++++++++++++++++++++
 libavcodec/aarch64/vvc/dsp_init.c   |  7 +++++++
 3 files changed, 40 insertions(+)

diff --git a/libavcodec/aarch64/h26x/dsp.h b/libavcodec/aarch64/h26x/dsp.h
index c54906dde2..6978b900fe 100644
--- a/libavcodec/aarch64/h26x/dsp.h
+++ b/libavcodec/aarch64/h26x/dsp.h
@@ -248,6 +248,9 @@  NEON8_FNPROTO_PARTIAL_4(qpel, (int16_t *dst, const uint8_t *_src, ptrdiff_t _src
 NEON8_FNPROTO_PARTIAL_4(qpel_uni, (uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
         ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, int width),)
 
+NEON8_FNPROTO_PARTIAL_4(epel, (int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height,
+        const int8_t *hf, const int8_t *vf, int width),)
+
 #undef NEON8_FNPROTO_PARTIAL_6
 #define NEON8_FNPROTO_PARTIAL_6(fn, args, ext) \
     void ff_vvc_put_##fn##4_8_neon##ext args; \
diff --git a/libavcodec/aarch64/h26x/epel_neon.S b/libavcodec/aarch64/h26x/epel_neon.S
index 8ca42a5c3a..80a0b66a52 100644
--- a/libavcodec/aarch64/h26x/epel_neon.S
+++ b/libavcodec/aarch64/h26x/epel_neon.S
@@ -1375,6 +1375,18 @@  endfunc
         mov             x10, #(HEVC_MAX_PB_SIZE * 2)
 .endm
 
+.macro VVC_EPEL_H_HEADER
+        ld1r            {v30.4s}, [x4]
+        sub             x1, x1, #1
+        mov             x10, #(VVC_MAX_PB_SIZE * 2)
+.endm
+
+function ff_vvc_put_epel_h4_8_neon, export=1
+        VVC_EPEL_H_HEADER
+        sxtl            v0.8h,   v30.8b
+        b               1f
+endfunc
+
 function ff_hevc_put_hevc_epel_h4_8_neon, export=1
         EPEL_H_HEADER
         sxtl            v0.8h,   v30.8b
@@ -1414,6 +1426,12 @@  function ff_hevc_put_hevc_epel_h6_8_neon, export=1
         ret
 endfunc
 
+function ff_vvc_put_epel_h8_8_neon, export=1
+        VVC_EPEL_H_HEADER
+        sxtl            v0.8h,   v30.8b
+        b               1f
+endfunc
+
 function ff_hevc_put_hevc_epel_h8_8_neon, export=1
         EPEL_H_HEADER
         sxtl            v0.8h,   v30.8b
@@ -1461,6 +1479,12 @@  function ff_hevc_put_hevc_epel_h12_8_neon, export=1
         ret
 endfunc
 
+function ff_vvc_put_epel_h16_8_neon, export=1
+        VVC_EPEL_H_HEADER
+        sxtl            v0.8h,   v30.8b
+        b               1f
+endfunc
+
 function ff_hevc_put_hevc_epel_h16_8_neon, export=1
         EPEL_H_HEADER
         sxtl            v0.8h,   v30.8b
@@ -1523,8 +1547,14 @@  function ff_hevc_put_hevc_epel_h24_8_neon, export=1
         ret
 endfunc
 
+function ff_vvc_put_epel_h32_8_neon, export=1
+        VVC_EPEL_H_HEADER
+        b               0f
+endfunc
+
 function ff_hevc_put_hevc_epel_h32_8_neon, export=1
         EPEL_H_HEADER
+0:
         ld1             {v1.8b}, [x1], #8
         sub             x2,  x2,  w6, uxtw    // decrement src stride
         mov             w7,  w6               // original width
diff --git a/libavcodec/aarch64/vvc/dsp_init.c b/libavcodec/aarch64/vvc/dsp_init.c
index 714d642634..c8c13eb068 100644
--- a/libavcodec/aarch64/vvc/dsp_init.c
+++ b/libavcodec/aarch64/vvc/dsp_init.c
@@ -77,6 +77,13 @@  void ff_vvc_dsp_init_aarch64(VVCDSPContext *const c, const int bd)
         c->inter.put[0][5][1][1] = ff_vvc_put_qpel_hv64_8_neon;
         c->inter.put[0][6][1][1] = ff_vvc_put_qpel_hv128_8_neon;
 
+        c->inter.put[1][1][0][1] = ff_vvc_put_epel_h4_8_neon;
+        c->inter.put[1][2][0][1] = ff_vvc_put_epel_h8_8_neon;
+        c->inter.put[1][3][0][1] = ff_vvc_put_epel_h16_8_neon;
+        c->inter.put[1][4][0][1] =
+        c->inter.put[1][5][0][1] =
+        c->inter.put[1][6][0][1] = ff_vvc_put_epel_h32_8_neon;
+
         c->inter.put_uni[0][1][0][0] = ff_vvc_put_pel_uni_pixels4_8_neon;
         c->inter.put_uni[0][2][0][0] = ff_vvc_put_pel_uni_pixels8_8_neon;
         c->inter.put_uni[0][3][0][0] = ff_vvc_put_pel_uni_pixels16_8_neon;

[FFmpeg-devel,v2,11/14] aarch64/vvc: Add put_epel_h

Commit Message

Patch