diff mbox series

[FFmpeg-devel,2/2] aarch64/vvc: Bind h26x/sao filter implementation to vvc

Message ID tencent_998F96544D23A05CCD8DCFC4554227163807@qq.com
State New
Headers show
Series [FFmpeg-devel,1/2] aarch64/hevc: Move sao to h26x directory | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Zhao Zhili Aug. 28, 2024, 3:21 p.m. UTC
From: Zhao Zhili <zhilizhao@tencent.com>

---
 libavcodec/aarch64/h26x/dsp.h             |  6 +++-
 libavcodec/aarch64/h26x/sao_neon.S        | 44 +++++++++++++++++------
 libavcodec/aarch64/hevcdsp_init_aarch64.c |  2 +-
 libavcodec/aarch64/vvc/Makefile           |  5 +--
 libavcodec/aarch64/vvc/dsp_init.c         |  6 ++++
 5 files changed, 48 insertions(+), 15 deletions(-)

Comments

Martin Storsjö Aug. 29, 2024, 11:25 a.m. UTC | #1
On Wed, 28 Aug 2024, Zhao Zhili wrote:

> From: Zhao Zhili <zhilizhao@tencent.com>
>
> ---
> libavcodec/aarch64/h26x/dsp.h             |  6 +++-
> libavcodec/aarch64/h26x/sao_neon.S        | 44 +++++++++++++++++------
> libavcodec/aarch64/hevcdsp_init_aarch64.c |  2 +-
> libavcodec/aarch64/vvc/Makefile           |  5 +--
> libavcodec/aarch64/vvc/dsp_init.c         |  6 ++++
> 5 files changed, 48 insertions(+), 15 deletions(-)

These two patches look reasonable to me.

// Martin
Nuo Mi Aug. 31, 2024, 8:09 a.m. UTC | #2
On Thu, Aug 29, 2024 at 7:35 PM Martin Storsjö <martin@martin.st> wrote:

> On Wed, 28 Aug 2024, Zhao Zhili wrote:
>
> > From: Zhao Zhili <zhilizhao@tencent.com>
> >
> > ---
> > libavcodec/aarch64/h26x/dsp.h             |  6 +++-
> > libavcodec/aarch64/h26x/sao_neon.S        | 44 +++++++++++++++++------
> > libavcodec/aarch64/hevcdsp_init_aarch64.c |  2 +-
> > libavcodec/aarch64/vvc/Makefile           |  5 +--
> > libavcodec/aarch64/vvc/dsp_init.c         |  6 ++++
> > 5 files changed, 48 insertions(+), 15 deletions(-)
>
> These two patches look reasonable to me.
>
> // Martin
>
Thank you, Zhili and Martin.
Applied.

>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
diff mbox series

Patch

diff --git a/libavcodec/aarch64/h26x/dsp.h b/libavcodec/aarch64/h26x/dsp.h
index 4dcaf0e6bb..d3f7a4dfe3 100644
--- a/libavcodec/aarch64/h26x/dsp.h
+++ b/libavcodec/aarch64/h26x/dsp.h
@@ -24,7 +24,7 @@ 
 #include <stddef.h>
 #include <stdint.h>
 
-void ff_hevc_sao_band_filter_8x8_8_neon(uint8_t *_dst, const uint8_t *_src,
+void ff_h26x_sao_band_filter_8x8_8_neon(uint8_t *_dst, const uint8_t *_src,
                                         ptrdiff_t stride_dst, ptrdiff_t stride_src,
                                         const int16_t *sao_offset_val, int sao_left_class,
                                         int width, int height);
@@ -33,4 +33,8 @@  void ff_hevc_sao_edge_filter_16x16_8_neon(uint8_t *dst, const uint8_t *src, ptrd
 void ff_hevc_sao_edge_filter_8x8_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst,
                                         const int16_t *sao_offset_val, int eo, int width, int height);
 
+void ff_vvc_sao_edge_filter_16x16_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst,
+                                         const int16_t *sao_offset_val, int eo, int width, int height);
+void ff_vvc_sao_edge_filter_8x8_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst,
+                                       const int16_t *sao_offset_val, int eo, int width, int height);
 #endif
diff --git a/libavcodec/aarch64/h26x/sao_neon.S b/libavcodec/aarch64/h26x/sao_neon.S
index dc407484de..c43820135e 100644
--- a/libavcodec/aarch64/h26x/sao_neon.S
+++ b/libavcodec/aarch64/h26x/sao_neon.S
@@ -24,15 +24,17 @@ 
 
 #include "libavutil/aarch64/asm.S"
 
-#define MAX_PB_SIZE 64
+#define HEVC_MAX_PB_SIZE 64
+#define VVC_MAX_PB_SIZE 128
 #define AV_INPUT_BUFFER_PADDING_SIZE 64
-#define SAO_STRIDE (2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE)
+#define HEVC_SAO_STRIDE (2 * HEVC_MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE)
+#define VVC_SAO_STRIDE (2 * VVC_MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE)
 
 // void sao_band_filter(uint8_t *_dst, uint8_t *_src,
 //                      ptrdiff_t stride_dst, ptrdiff_t stride_src,
 //                      int16_t *sao_offset_val, int sao_left_class,
 //                      int width, int height)
-function ff_hevc_sao_band_filter_8x8_8_neon, export=1
+function ff_h26x_sao_band_filter_8x8_8_neon, export=1
         stp             xzr, xzr, [sp, #-64]!
         stp             xzr, xzr, [sp, #16]
         stp             xzr, xzr, [sp, #32]
@@ -79,16 +81,30 @@  function ff_hevc_sao_band_filter_8x8_8_neon, export=1
         ret
 endfunc
 
-.Lsao_edge_pos:
+.Lhevc_sao_edge_pos:
 .word 1 // horizontal
-.word SAO_STRIDE // vertical
-.word SAO_STRIDE + 1 // 45 degree
-.word SAO_STRIDE - 1 // 135 degree
+.word HEVC_SAO_STRIDE // vertical
+.word HEVC_SAO_STRIDE + 1 // 45 degree
+.word HEVC_SAO_STRIDE - 1 // 135 degree
+
+.Lvvc_sao_edge_pos:
+.word 1 // horizontal
+.word VVC_SAO_STRIDE // vertical
+.word VVC_SAO_STRIDE + 1 // 45 degree
+.word VVC_SAO_STRIDE - 1 // 135 degree
+
+function ff_vvc_sao_edge_filter_16x16_8_neon, export=1
+        adr             x7, .Lvvc_sao_edge_pos
+        mov             x15, #VVC_SAO_STRIDE
+        b               1f
+endfunc
 
 // ff_hevc_sao_edge_filter_16x16_8_neon(char *dst, char *src, ptrdiff stride_dst,
 //                                      int16 *sao_offset_val, int eo, int width, int height)
 function ff_hevc_sao_edge_filter_16x16_8_neon, export=1
-        adr             x7, .Lsao_edge_pos
+        adr             x7, .Lhevc_sao_edge_pos
+        mov             x15, #HEVC_SAO_STRIDE
+1:
         ld1             {v3.8h}, [x3]              // load sao_offset_val
         add             w5,  w5,  #0xF
         bic             w5,  w5,  #0xF
@@ -101,7 +117,6 @@  function ff_hevc_sao_edge_filter_16x16_8_neon, export=1
         uzp2            v1.16b, v3.16b, v3.16b     // sao_offset_val -> upper
         uzp1            v0.16b, v3.16b, v3.16b     // sao_offset_val -> lower
         movi            v2.16b, #2
-        mov             x15, #SAO_STRIDE
         // strides between end of line and next src/dst
         sub             x15, x15, x5               // stride_src - width
         sub             x16, x2, x5                // stride_dst - width
@@ -145,10 +160,18 @@  function ff_hevc_sao_edge_filter_16x16_8_neon, export=1
         ret
 endfunc
 
+function ff_vvc_sao_edge_filter_8x8_8_neon, export=1
+        adr             x7, .Lvvc_sao_edge_pos
+        mov             x15, #VVC_SAO_STRIDE
+        b               1f
+endfunc
+
 // ff_hevc_sao_edge_filter_8x8_8_neon(char *dst, char *src, ptrdiff stride_dst,
 //                                    int16 *sao_offset_val, int eo, int width, int height)
 function ff_hevc_sao_edge_filter_8x8_8_neon, export=1
-        adr             x7, .Lsao_edge_pos
+        adr             x7, .Lhevc_sao_edge_pos
+        mov             x15, #HEVC_SAO_STRIDE
+1:
         ldr             w4, [x7, w4, uxtw #2]
         ld1             {v3.8h}, [x3]
         mov             v3.h[7], v3.h[0]
@@ -160,7 +183,6 @@  function ff_hevc_sao_edge_filter_8x8_8_neon, export=1
         movi            v2.16b, #2
         add             x16, x0, x2
         lsl             x2,  x2, #1
-        mov             x15, #SAO_STRIDE
         mov             x8,  x1
         sub             x9,  x1, x4
         add             x10, x1, x4
diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c b/libavcodec/aarch64/hevcdsp_init_aarch64.c
index 7efae0f740..a90da0246e 100644
--- a/libavcodec/aarch64/hevcdsp_init_aarch64.c
+++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c
@@ -384,7 +384,7 @@  av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
         c->sao_band_filter[1]          =
         c->sao_band_filter[2]          =
         c->sao_band_filter[3]          =
-        c->sao_band_filter[4]          = ff_hevc_sao_band_filter_8x8_8_neon;
+        c->sao_band_filter[4]          = ff_h26x_sao_band_filter_8x8_8_neon;
         c->sao_edge_filter[0]          = ff_hevc_sao_edge_filter_8x8_8_neon;
         c->sao_edge_filter[1]          =
         c->sao_edge_filter[2]          =
diff --git a/libavcodec/aarch64/vvc/Makefile b/libavcodec/aarch64/vvc/Makefile
index 58398d6e3d..54c49fea92 100644
--- a/libavcodec/aarch64/vvc/Makefile
+++ b/libavcodec/aarch64/vvc/Makefile
@@ -1,5 +1,6 @@ 
 clean::
 	$(RM) $(CLEANSUFFIXES:%=libavcodec/aarch64/vvc/%)
 
-OBJS-$(CONFIG_VVC_DECODER)				+= aarch64/vvc/dsp_init.o
-NEON-OBJS-$(CONFIG_VVC_DECODER)			+= aarch64/vvc/alf.o
+OBJS-$(CONFIG_VVC_DECODER)              += aarch64/vvc/dsp_init.o
+NEON-OBJS-$(CONFIG_VVC_DECODER)         += aarch64/vvc/alf.o \
+                                           aarch64/h26x/sao_neon.o
diff --git a/libavcodec/aarch64/vvc/dsp_init.c b/libavcodec/aarch64/vvc/dsp_init.c
index 2a9f25911f..0aac140a8f 100644
--- a/libavcodec/aarch64/vvc/dsp_init.c
+++ b/libavcodec/aarch64/vvc/dsp_init.c
@@ -22,6 +22,7 @@ 
 
 #include "libavutil/cpu.h"
 #include "libavutil/aarch64/cpu.h"
+#include "libavcodec/aarch64/h26x/dsp.h"
 #include "libavcodec/vvc/dsp.h"
 #include "libavcodec/vvc/dec.h"
 #include "libavcodec/vvc/ctu.h"
@@ -45,6 +46,11 @@  void ff_vvc_dsp_init_aarch64(VVCDSPContext *const c, const int bd)
         return;
 
     if (bd == 8) {
+        for (int i = 0; i < FF_ARRAY_ELEMS(c->sao.band_filter); i++)
+            c->sao.band_filter[i] = ff_h26x_sao_band_filter_8x8_8_neon;
+        c->sao.edge_filter[0] = ff_vvc_sao_edge_filter_8x8_8_neon;
+        for (int i = 1; i < FF_ARRAY_ELEMS(c->sao.edge_filter); i++)
+            c->sao.edge_filter[i] = ff_vvc_sao_edge_filter_16x16_8_neon;
         c->alf.filter[LUMA] = alf_filter_luma_8_neon;
         c->alf.filter[CHROMA] = alf_filter_chroma_8_neon;
     } else if (bd == 10) {