diff mbox series

[FFmpeg-devel,v2,04/14] aarch64/vvc: Add put_pel/put_pel_uni/put_pel_uni_w

Message ID tencent_D5875311DC73CBB3A60750BF26D827DE2A06@qq.com
State New
Headers show
Series aarch64/vvc: Add SIMD | expand

Commit Message

Zhao Zhili Sept. 11, 2024, 6:06 p.m. UTC
From: Zhao Zhili <zhilizhao@tencent.com>

put_luma_pixels_8_4x4_c:                                 0.2 ( 1.00x)
put_luma_pixels_8_4x4_neon:                              0.2 ( 1.00x)
put_luma_pixels_8_8x8_c:                                 0.7 ( 1.00x)
put_luma_pixels_8_8x8_neon:                              0.2 ( 3.22x)
put_luma_pixels_8_16x16_c:                               2.2 ( 1.00x)
put_luma_pixels_8_16x16_neon:                            0.2 ( 9.89x)
put_luma_pixels_8_32x32_c:                               8.2 ( 1.00x)
put_luma_pixels_8_32x32_neon:                            1.2 ( 6.71x)
put_luma_pixels_8_64x64_c:                              33.7 ( 1.00x)
put_luma_pixels_8_64x64_neon:                            2.5 (13.63x)
put_luma_pixels_8_128x128_c:                           145.5 ( 1.00x)
put_luma_pixels_8_128x128_neon:                         10.2 (14.23x)
put_uni_pixels_luma_8_4x4_c:                             0.5 ( 1.00x)
put_uni_pixels_luma_8_4x4_neon:                          0.0 ( 0.00x)
put_uni_pixels_luma_8_8x8_c:                             0.5 ( 1.00x)
put_uni_pixels_luma_8_8x8_neon:                          0.2 ( 2.11x)
put_uni_pixels_luma_8_16x16_c:                           1.2 ( 1.00x)
put_uni_pixels_luma_8_16x16_neon:                        0.2 ( 5.44x)
put_uni_pixels_luma_8_32x32_c:                           3.0 ( 1.00x)
put_uni_pixels_luma_8_32x32_neon:                        0.5 ( 6.26x)
put_uni_pixels_luma_8_64x64_c:                           3.0 ( 1.00x)
put_uni_pixels_luma_8_64x64_neon:                        1.7 ( 1.72x)
put_uni_pixels_luma_8_128x128_c:                         6.5 ( 1.00x)
put_uni_pixels_luma_8_128x128_neon:                      6.5 ( 1.00x)
---
 libavcodec/aarch64/h26x/dsp.h       |  22 ++++
 libavcodec/aarch64/h26x/epel_neon.S | 189 +++++++++++++++++-----------
 libavcodec/aarch64/h26x/qpel_neon.S |  81 +++++++++++-
 libavcodec/aarch64/vvc/Makefile     |   1 +
 libavcodec/aarch64/vvc/dsp_init.c   |  21 ++++
 5 files changed, 241 insertions(+), 73 deletions(-)
diff mbox series

Patch

diff --git a/libavcodec/aarch64/h26x/dsp.h b/libavcodec/aarch64/h26x/dsp.h
index f72746ce03..076d01b477 100644
--- a/libavcodec/aarch64/h26x/dsp.h
+++ b/libavcodec/aarch64/h26x/dsp.h
@@ -248,4 +248,26 @@  NEON8_FNPROTO_PARTIAL_4(qpel, (int16_t *dst, const uint8_t *_src, ptrdiff_t _src
 NEON8_FNPROTO_PARTIAL_4(qpel_uni, (uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
         ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, int width),)
 
+#undef NEON8_FNPROTO_PARTIAL_6
+#define NEON8_FNPROTO_PARTIAL_6(fn, args, ext) \
+    void ff_vvc_put_##fn##4_8_neon##ext args; \
+    void ff_vvc_put_##fn##8_8_neon##ext args; \
+    void ff_vvc_put_##fn##16_8_neon##ext args; \
+    void ff_vvc_put_##fn##32_8_neon##ext args; \
+    void ff_vvc_put_##fn##64_8_neon##ext args; \
+    void ff_vvc_put_##fn##128_8_neon##ext args
+
+NEON8_FNPROTO_PARTIAL_6(pel_pixels, (int16_t *dst,
+        const uint8_t *src, ptrdiff_t srcstride, int height,
+        const int8_t *hf, const int8_t *vf, int width),);
+
+NEON8_FNPROTO_PARTIAL_6(pel_uni_pixels, (uint8_t *_dst, ptrdiff_t _dststride,
+        const uint8_t *_src, ptrdiff_t _srcstride, int height,
+        const int8_t *hf, const int8_t *vf, int width),);
+
+NEON8_FNPROTO_PARTIAL_6(pel_uni_w_pixels, (uint8_t *_dst, ptrdiff_t _dststride,
+        const uint8_t *_src, ptrdiff_t _srcstride,
+        int height, int denom, int wx, int ox,
+        const int8_t *hf, const int8_t *vf, int width),);
+
 #endif
diff --git a/libavcodec/aarch64/h26x/epel_neon.S b/libavcodec/aarch64/h26x/epel_neon.S
index 378b0f7fb2..8ca42a5c3a 100644
--- a/libavcodec/aarch64/h26x/epel_neon.S
+++ b/libavcodec/aarch64/h26x/epel_neon.S
@@ -19,7 +19,8 @@ 
  */
 
 #include "libavutil/aarch64/asm.S"
-#define MAX_PB_SIZE 64
+#define HEVC_MAX_PB_SIZE 64
+#define VVC_MAX_PB_SIZE 128
 
 const epel_filters, align=4
         .byte  0,  0,  0,  0
@@ -131,8 +132,13 @@  endconst
         b.ne            1b
 .endm
 
+function ff_vvc_put_pel_pixels4_8_neon, export=1
+        mov             x7, #(VVC_MAX_PB_SIZE * 2)
+        b               1f
+endfunc
+
 function ff_hevc_put_hevc_pel_pixels4_8_neon, export=1
-        mov             x7, #(MAX_PB_SIZE * 2)
+        mov             x7, #(HEVC_MAX_PB_SIZE * 2)
 1:      ld1             {v0.s}[0], [x1], x2
         ushll           v4.8h, v0.8b, #6
         subs            w3, w3, #1
@@ -142,7 +148,7 @@  function ff_hevc_put_hevc_pel_pixels4_8_neon, export=1
 endfunc
 
 function ff_hevc_put_hevc_pel_pixels6_8_neon, export=1
-        mov             x7, #(MAX_PB_SIZE * 2 - 8)
+        mov             x7, #(HEVC_MAX_PB_SIZE * 2 - 8)
 1:      ld1             {v0.8b}, [x1], x2
         ushll           v4.8h, v0.8b, #6
         st1             {v4.d}[0], [x0], #8
@@ -152,8 +158,13 @@  function ff_hevc_put_hevc_pel_pixels6_8_neon, export=1
         ret
 endfunc
 
+function ff_vvc_put_pel_pixels8_8_neon, export=1
+        mov             x7, #(VVC_MAX_PB_SIZE * 2)
+        b               1f
+endfunc
+
 function ff_hevc_put_hevc_pel_pixels8_8_neon, export=1
-        mov             x7, #(MAX_PB_SIZE * 2)
+        mov             x7, #(HEVC_MAX_PB_SIZE * 2)
 1:      ld1             {v0.8b}, [x1], x2
         ushll           v4.8h, v0.8b, #6
         subs            w3, w3, #1
@@ -163,7 +174,7 @@  function ff_hevc_put_hevc_pel_pixels8_8_neon, export=1
 endfunc
 
 function ff_hevc_put_hevc_pel_pixels12_8_neon, export=1
-        mov             x7, #(MAX_PB_SIZE * 2 - 16)
+        mov             x7, #(HEVC_MAX_PB_SIZE * 2 - 16)
 1:      ld1             {v0.8b, v1.8b}, [x1], x2
         ushll           v4.8h, v0.8b, #6
         st1             {v4.8h}, [x0], #16
@@ -174,8 +185,13 @@  function ff_hevc_put_hevc_pel_pixels12_8_neon, export=1
         ret
 endfunc
 
+function ff_vvc_put_pel_pixels16_8_neon, export=1
+        mov             x7, #(VVC_MAX_PB_SIZE * 2)
+        b               1f
+endfunc
+
 function ff_hevc_put_hevc_pel_pixels16_8_neon, export=1
-        mov             x7, #(MAX_PB_SIZE * 2)
+        mov             x7, #(HEVC_MAX_PB_SIZE * 2)
 1:      ld1             {v0.8b, v1.8b}, [x1], x2
         ushll           v4.8h, v0.8b, #6
         ushll           v5.8h, v1.8b, #6
@@ -186,7 +202,7 @@  function ff_hevc_put_hevc_pel_pixels16_8_neon, export=1
 endfunc
 
 function ff_hevc_put_hevc_pel_pixels24_8_neon, export=1
-        mov             x7, #(MAX_PB_SIZE * 2)
+        mov             x7, #(HEVC_MAX_PB_SIZE * 2)
 1:      ld1             {v0.8b-v2.8b}, [x1], x2
         ushll           v4.8h, v0.8b, #6
         ushll           v5.8h, v1.8b, #6
@@ -197,8 +213,13 @@  function ff_hevc_put_hevc_pel_pixels24_8_neon, export=1
         ret
 endfunc
 
+function ff_vvc_put_pel_pixels32_8_neon, export=1
+        mov             x7, #(VVC_MAX_PB_SIZE * 2)
+        b               1f
+endfunc
+
 function ff_hevc_put_hevc_pel_pixels32_8_neon, export=1
-        mov             x7, #(MAX_PB_SIZE * 2)
+        mov             x7, #(HEVC_MAX_PB_SIZE * 2)
 1:      ld1             {v0.8b-v3.8b}, [x1], x2
         ushll           v4.8h, v0.8b, #6
         ushll           v5.8h, v1.8b, #6
@@ -211,7 +232,7 @@  function ff_hevc_put_hevc_pel_pixels32_8_neon, export=1
 endfunc
 
 function ff_hevc_put_hevc_pel_pixels48_8_neon, export=1
-        mov             x7, #(MAX_PB_SIZE)
+        mov             x7, #(HEVC_MAX_PB_SIZE)
 1:      ld1             {v0.16b-v2.16b}, [x1], x2
         ushll           v4.8h, v0.8b, #6
         ushll2          v5.8h, v0.16b, #6
@@ -226,26 +247,50 @@  function ff_hevc_put_hevc_pel_pixels48_8_neon, export=1
         ret
 endfunc
 
-function ff_hevc_put_hevc_pel_pixels64_8_neon, export=1
-1:      ld1             {v0.16b-v3.16b}, [x1], x2
+.macro put_pel_pixels64_8_neon
         ushll           v4.8h, v0.8b, #6
         ushll2          v5.8h, v0.16b, #6
         ushll           v6.8h, v1.8b, #6
         ushll2          v7.8h, v1.16b, #6
-        st1             {v4.8h-v7.8h}, [x0], #(MAX_PB_SIZE)
+        st1             {v4.8h-v7.8h}, [x0], #64
         ushll           v16.8h, v2.8b, #6
         ushll2          v17.8h, v2.16b, #6
         ushll           v18.8h, v3.8b, #6
         ushll2          v19.8h, v3.16b, #6
+        st1             {v16.8h-v19.8h}, [x0], x7
+.endm
+
+function ff_vvc_put_pel_pixels64_8_neon, export=1
+        mov             x7, #(2 * VVC_MAX_PB_SIZE - 64)
+        b               1f
+endfunc
+
+function ff_hevc_put_hevc_pel_pixels64_8_neon, export=1
+        mov             x7, #(HEVC_MAX_PB_SIZE)
+1:
+        ld1             {v0.16b-v3.16b}, [x1], x2
         subs            w3, w3, #1
-        st1             {v16.8h-v19.8h}, [x0], #(MAX_PB_SIZE)
+        put_pel_pixels64_8_neon
         b.ne            1b
         ret
 endfunc
 
+function ff_vvc_put_pel_pixels128_8_neon, export=1
+        mov             x7, #64
+1:
+        mov             x6, x1
+        ld1             {v0.16b-v3.16b}, [x6], #64
+        add             x1, x1, x2
+        subs            w3, w3, #1
+        put_pel_pixels64_8_neon
+        ld1             {v0.16b-v3.16b}, [x6], #64
+        put_pel_pixels64_8_neon
+        b.ne            1b
+        ret
+endfunc
 
 function ff_hevc_put_hevc_pel_bi_pixels4_8_neon, export=1
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
 1:      ld1             {v0.s}[0], [x2], x3 // src
         ushll           v16.8h, v0.8b, #6
         ld1             {v20.4h}, [x4], x10 // src2
@@ -258,7 +303,7 @@  function ff_hevc_put_hevc_pel_bi_pixels4_8_neon, export=1
 endfunc
 
 function ff_hevc_put_hevc_pel_bi_pixels6_8_neon, export=1
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         sub             x1, x1, #4
 1:      ld1             {v0.8b}, [x2], x3
         ushll           v16.8h, v0.8b, #6
@@ -273,7 +318,7 @@  function ff_hevc_put_hevc_pel_bi_pixels6_8_neon, export=1
 endfunc
 
 function ff_hevc_put_hevc_pel_bi_pixels8_8_neon, export=1
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
 1:      ld1             {v0.8b}, [x2], x3    // src
         ushll           v16.8h, v0.8b, #6
         ld1             {v20.8h}, [x4], x10  // src2
@@ -286,7 +331,7 @@  function ff_hevc_put_hevc_pel_bi_pixels8_8_neon, export=1
 endfunc
 
 function ff_hevc_put_hevc_pel_bi_pixels12_8_neon, export=1
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         sub             x1, x1, #8
 1:      ld1             {v0.16b}, [x2], x3
         ushll           v16.8h, v0.8b, #6
@@ -304,7 +349,7 @@  function ff_hevc_put_hevc_pel_bi_pixels12_8_neon, export=1
 endfunc
 
 function ff_hevc_put_hevc_pel_bi_pixels16_8_neon, export=1
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
 1:      ld1             {v0.16b}, [x2], x3 // src
         ushll           v16.8h, v0.8b, #6
         ushll2          v17.8h, v0.16b, #6
@@ -320,7 +365,7 @@  function ff_hevc_put_hevc_pel_bi_pixels16_8_neon, export=1
 endfunc
 
 function ff_hevc_put_hevc_pel_bi_pixels24_8_neon, export=1
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
 1:      ld1             {v0.8b-v2.8b}, [x2], x3 // src
         ushll           v16.8h, v0.8b, #6
         ushll           v17.8h, v1.8b, #6
@@ -339,7 +384,7 @@  function ff_hevc_put_hevc_pel_bi_pixels24_8_neon, export=1
 endfunc
 
 function ff_hevc_put_hevc_pel_bi_pixels32_8_neon, export=1
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
 1:      ld1             {v0.16b-v1.16b}, [x2], x3 // src
         ushll           v16.8h, v0.8b, #6
         ushll2          v17.8h, v0.16b, #6
@@ -361,7 +406,7 @@  function ff_hevc_put_hevc_pel_bi_pixels32_8_neon, export=1
 endfunc
 
 function ff_hevc_put_hevc_pel_bi_pixels48_8_neon, export=1
-        mov             x10, #(MAX_PB_SIZE)
+        mov             x10, #(HEVC_MAX_PB_SIZE)
 1:      ld1             {v0.16b-v2.16b}, [x2], x3 // src
         ushll           v16.8h, v0.8b, #6
         ushll2          v17.8h, v0.16b, #6
@@ -369,7 +414,7 @@  function ff_hevc_put_hevc_pel_bi_pixels48_8_neon, export=1
         ushll2          v19.8h, v1.16b, #6
         ushll           v20.8h, v2.8b, #6
         ushll2          v21.8h, v2.16b, #6
-        ld1             {v24.8h-v27.8h}, [x4], #(MAX_PB_SIZE) // src2
+        ld1             {v24.8h-v27.8h}, [x4], #(HEVC_MAX_PB_SIZE) // src2
         sqadd           v16.8h, v16.8h, v24.8h
         sqadd           v17.8h, v17.8h, v25.8h
         sqadd           v18.8h, v18.8h, v26.8h
@@ -399,12 +444,12 @@  function ff_hevc_put_hevc_pel_bi_pixels64_8_neon, export=1
         ushll2          v21.8h, v2.16b, #6
         ushll           v22.8h, v3.8b, #6
         ushll2          v23.8h, v3.16b, #6
-        ld1             {v24.8h, v25.8h, v26.8h, v27.8h}, [x4], #(MAX_PB_SIZE) // src2
+        ld1             {v24.8h, v25.8h, v26.8h, v27.8h}, [x4], #(HEVC_MAX_PB_SIZE) // src2
         sqadd           v16.8h, v16.8h, v24.8h
         sqadd           v17.8h, v17.8h, v25.8h
         sqadd           v18.8h, v18.8h, v26.8h
         sqadd           v19.8h, v19.8h, v27.8h
-        ld1             {v24.8h, v25.8h, v26.8h, v27.8h}, [x4], #(MAX_PB_SIZE)
+        ld1             {v24.8h, v25.8h, v26.8h, v27.8h}, [x4], #(HEVC_MAX_PB_SIZE)
         sqadd           v20.8h, v20.8h, v24.8h
         sqadd           v21.8h, v21.8h, v25.8h
         sqadd           v22.8h, v22.8h, v26.8h
@@ -427,7 +472,7 @@  endfunc
 function ff_hevc_put_hevc_epel_bi_h4_8_neon, export=1
         load_epel_filterb x6, x7
         sub             x2, x2, #1
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
 1:      ld1             {v4.8b}, [x2], x3
         ext             v5.8b, v4.8b, v4.8b, #1
         ext             v6.8b, v4.8b, v4.8b, #2
@@ -446,7 +491,7 @@  function ff_hevc_put_hevc_epel_bi_h6_8_neon, export=1
         load_epel_filterb x6, x7
         sub             w1, w1, #4
         sub             x2, x2, #1
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
 1:      ld1             {v24.16b}, [x2], x3
         ext             v26.16b, v24.16b, v24.16b, #1
         ext             v27.16b, v24.16b, v24.16b, #2
@@ -465,7 +510,7 @@  endfunc
 function ff_hevc_put_hevc_epel_bi_h8_8_neon, export=1
         load_epel_filterb x6, x7
         sub             x2, x2, #1
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
 1:      ld1             {v24.16b}, [x2], x3
         ext             v26.16b, v24.16b, v24.16b, #1
         ext             v27.16b, v24.16b, v24.16b, #2
@@ -484,7 +529,7 @@  function ff_hevc_put_hevc_epel_bi_h12_8_neon, export=1
         load_epel_filterb x6, x7
         sub             x1, x1, #8
         sub             x2, x2, #1
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
 1:      ld1             {v24.16b}, [x2], x3
         ext             v26.16b, v24.16b, v24.16b, #1
         ext             v27.16b, v24.16b, v24.16b, #2
@@ -506,7 +551,7 @@  endfunc
 function ff_hevc_put_hevc_epel_bi_h16_8_neon, export=1
         load_epel_filterb x6, x7
         sub             x2, x2, #1
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
 1:      ldr             q24, [x2]
         ldr             s25, [x2, #16]
         add             x2, x2, x3
@@ -529,7 +574,7 @@  endfunc
 function ff_hevc_put_hevc_epel_bi_h24_8_neon, export=1
         load_epel_filterb x6, x7
         sub             x2, x2, #1
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
 1:      ld1             {v24.16b, v25.16b}, [x2], x3
         ext             v26.16b, v24.16b, v25.16b, #1
         ext             v27.16b, v24.16b, v25.16b, #2
@@ -556,7 +601,7 @@  endfunc
 function ff_hevc_put_hevc_epel_bi_h32_8_neon, export=1
         load_epel_filterb x6, x7
         sub             x2, x2, #1
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
 1:      ldp             q24, q25, [x2]
         ldr             s26, [x2, #32]
         add             x2, x2, x3
@@ -589,7 +634,7 @@  function ff_hevc_put_hevc_epel_bi_h48_8_neon, export=1
         load_epel_filterb x6, x7
         sub             x2, x2, #1
         mov             x7, #24
-        mov             x10, #(MAX_PB_SIZE * 2 - 48)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2 - 48)
 1:      ld1             {v24.16b, v25.16b, v26.16b}, [x2]
         ldr             s27, [x2, #48]
         add             x2, x2, x3
@@ -683,7 +728,7 @@  endfunc
 function ff_hevc_put_hevc_epel_bi_v4_8_neon, export=1
         load_epel_filterb x7, x6
         sub             x2, x2, x3
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ld1             {v16.s}[0], [x2], x3
         ld1             {v17.s}[0], [x2], x3
         ld1             {v18.s}[0], [x2], x3
@@ -705,7 +750,7 @@  function ff_hevc_put_hevc_epel_bi_v6_8_neon, export=1
         load_epel_filterb x7, x6
         sub             x2, x2, x3
         sub             x1, x1, #4
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ld1             {v16.8b}, [x2], x3
         ld1             {v17.8b}, [x2], x3
         ld1             {v18.8b}, [x2], x3
@@ -727,7 +772,7 @@  endfunc
 function ff_hevc_put_hevc_epel_bi_v8_8_neon, export=1
         load_epel_filterb x7, x6
         sub             x2, x2, x3
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ld1             {v16.8b}, [x2], x3
         ld1             {v17.8b}, [x2], x3
         ld1             {v18.8b}, [x2], x3
@@ -749,7 +794,7 @@  function ff_hevc_put_hevc_epel_bi_v12_8_neon, export=1
         load_epel_filterb x7, x6
         sub             x1, x1, #8
         sub             x2, x2, x3
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ld1             {v16.16b}, [x2], x3
         ld1             {v17.16b}, [x2], x3
         ld1             {v18.16b}, [x2], x3
@@ -774,7 +819,7 @@  endfunc
 function ff_hevc_put_hevc_epel_bi_v16_8_neon, export=1
         load_epel_filterb x7, x6
         sub             x2, x2, x3
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ld1             {v16.16b}, [x2], x3
         ld1             {v17.16b}, [x2], x3
         ld1             {v18.16b}, [x2], x3
@@ -798,7 +843,7 @@  endfunc
 function ff_hevc_put_hevc_epel_bi_v24_8_neon, export=1
         load_epel_filterb x7, x6
         sub             x2, x2, x3
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ld1             {v16.8b, v17.8b, v18.8b}, [x2], x3
         ld1             {v19.8b, v20.8b, v21.8b}, [x2], x3
         ld1             {v22.8b, v23.8b, v24.8b}, [x2], x3
@@ -825,7 +870,7 @@  endfunc
 function ff_hevc_put_hevc_epel_bi_v32_8_neon, export=1
         load_epel_filterb x7, x6
         sub             x2, x2, x3
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ld1             {v16.16b, v17.16b}, [x2], x3
         ld1             {v18.16b, v19.16b}, [x2], x3
         ld1             {v20.16b, v21.16b}, [x2], x3
@@ -895,7 +940,7 @@  endfunc
 function ff_hevc_put_hevc_epel_v4_8_neon, export=1
         load_epel_filterb x5, x4
         sub             x1, x1, x2
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ldr             s16, [x1]
         ldr             s17, [x1, x2]
         add             x1, x1, x2, lsl #1
@@ -915,7 +960,7 @@  endfunc
 function ff_hevc_put_hevc_epel_v6_8_neon, export=1
         load_epel_filterb x5, x4
         sub             x1, x1, x2
-        mov             x10, #(MAX_PB_SIZE * 2 - 8)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2 - 8)
         ldr             d16, [x1]
         ldr             d17, [x1, x2]
         add             x1, x1, x2, lsl #1
@@ -936,7 +981,7 @@  endfunc
 function ff_hevc_put_hevc_epel_v8_8_neon, export=1
         load_epel_filterb x5, x4
         sub             x1, x1, x2
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ldr             d16, [x1]
         ldr             d17, [x1, x2]
         add             x1, x1, x2, lsl #1
@@ -956,7 +1001,7 @@  endfunc
 function ff_hevc_put_hevc_epel_v12_8_neon, export=1
         load_epel_filterb x5, x4
         sub             x1, x1, x2
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ldr             q16, [x1]
         ldr             q17, [x1, x2]
         add             x1, x1, x2, lsl #1
@@ -980,7 +1025,7 @@  endfunc
 function ff_hevc_put_hevc_epel_v16_8_neon, export=1
         load_epel_filterb x5, x4
         sub             x1, x1, x2
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ldr             q16, [x1]
         ldr             q17, [x1, x2]
         add             x1, x1, x2, lsl #1
@@ -1002,7 +1047,7 @@  endfunc
 function ff_hevc_put_hevc_epel_v24_8_neon, export=1
         load_epel_filterb x5, x4
         sub             x1, x1, x2
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ld1             {v16.8b, v17.8b, v18.8b}, [x1], x2
         ld1             {v19.8b, v20.8b, v21.8b}, [x1], x2
         ld1             {v22.8b, v23.8b, v24.8b}, [x1], x2
@@ -1025,7 +1070,7 @@  endfunc
 function ff_hevc_put_hevc_epel_v32_8_neon, export=1
         load_epel_filterb x5, x4
         sub             x1, x1, x2
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ld1             {v16.16b, v17.16b}, [x1], x2
         ld1             {v18.16b, v19.16b}, [x1], x2
         ld1             {v20.16b, v21.16b}, [x1], x2
@@ -1327,7 +1372,7 @@  endfunc
         add             x5, x5, x4, lsl #2
         ld1r            {v30.4s}, [x5]
         sub             x1, x1, #1
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
 .endm
 
 function ff_hevc_put_hevc_epel_h4_8_neon, export=1
@@ -2179,7 +2224,7 @@  DISABLE_I8MM
 
 function hevc_put_hevc_epel_hv4_8_end_neon
         load_epel_filterh x5, x4
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ldr             d16, [sp]
         ldr             d17, [sp, x10]
         add             sp, sp, x10, lsl #1
@@ -2198,7 +2243,7 @@  endfunc
 function hevc_put_hevc_epel_hv6_8_end_neon
         load_epel_filterh x5, x4
         mov             x5, #120
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ldr             q16, [sp]
         ldr             q17, [sp, x10]
         add             sp, sp, x10, lsl #1
@@ -2218,7 +2263,7 @@  endfunc
 
 function hevc_put_hevc_epel_hv8_8_end_neon
         load_epel_filterh x5, x4
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ldr             q16, [sp]
         ldr             q17, [sp, x10]
         add             sp, sp, x10, lsl #1
@@ -2238,7 +2283,7 @@  endfunc
 function hevc_put_hevc_epel_hv12_8_end_neon
         load_epel_filterh x5, x4
         mov             x5, #112
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ld1             {v16.8h, v17.8h}, [sp], x10
         ld1             {v18.8h, v19.8h}, [sp], x10
         ld1             {v20.8h, v21.8h}, [sp], x10
@@ -2258,7 +2303,7 @@  endfunc
 
 function hevc_put_hevc_epel_hv16_8_end_neon
         load_epel_filterh x5, x4
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ld1             {v16.8h, v17.8h}, [sp], x10
         ld1             {v18.8h, v19.8h}, [sp], x10
         ld1             {v20.8h, v21.8h}, [sp], x10
@@ -2278,7 +2323,7 @@  endfunc
 
 function hevc_put_hevc_epel_hv24_8_end_neon
         load_epel_filterh x5, x4
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ld1             {v16.8h, v17.8h, v18.8h}, [sp], x10
         ld1             {v19.8h, v20.8h, v21.8h}, [sp], x10
         ld1             {v22.8h, v23.8h, v24.8h}, [sp], x10
@@ -2462,7 +2507,7 @@  epel_hv neon
 
 function hevc_put_hevc_epel_uni_hv4_8_end_neon
         load_epel_filterh x6, x5
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ld1             {v16.4h}, [sp], x10
         ld1             {v17.4h}, [sp], x10
         ld1             {v18.4h}, [sp], x10
@@ -2481,7 +2526,7 @@  endfunc
 function hevc_put_hevc_epel_uni_hv6_8_end_neon
         load_epel_filterh x6, x5
         sub             x1, x1, #4
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ld1             {v16.8h}, [sp], x10
         ld1             {v17.8h}, [sp], x10
         ld1             {v18.8h}, [sp], x10
@@ -2501,7 +2546,7 @@  endfunc
 
 function hevc_put_hevc_epel_uni_hv8_8_end_neon
         load_epel_filterh x6, x5
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ld1             {v16.8h}, [sp], x10
         ld1             {v17.8h}, [sp], x10
         ld1             {v18.8h}, [sp], x10
@@ -2521,7 +2566,7 @@  endfunc
 function hevc_put_hevc_epel_uni_hv12_8_end_neon
         load_epel_filterh x6, x5
         sub             x1, x1, #8
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ld1             {v16.8h, v17.8h}, [sp], x10
         ld1             {v18.8h, v19.8h}, [sp], x10
         ld1             {v20.8h, v21.8h}, [sp], x10
@@ -2543,7 +2588,7 @@  endfunc
 
 function hevc_put_hevc_epel_uni_hv16_8_end_neon
         load_epel_filterh x6, x5
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ld1             {v16.8h, v17.8h}, [sp], x10
         ld1             {v18.8h, v19.8h}, [sp], x10
         ld1             {v20.8h, v21.8h}, [sp], x10
@@ -2565,7 +2610,7 @@  endfunc
 
 function hevc_put_hevc_epel_uni_hv24_8_end_neon
         load_epel_filterh x6, x5
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ld1             {v16.8h, v17.8h, v18.8h}, [sp], x10
         ld1             {v19.8h, v20.8h, v21.8h}, [sp], x10
         ld1             {v22.8h, v23.8h, v24.8h}, [sp], x10
@@ -3223,7 +3268,7 @@  DISABLE_I8MM
 
 function hevc_put_hevc_epel_uni_w_hv4_8_end_neon
         load_epel_filterh x6, x5
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ld1             {v16.4h}, [sp], x10
         ld1             {v17.4h}, [sp], x10
         ld1             {v18.4h}, [sp], x10
@@ -3273,7 +3318,7 @@  endfunc
 function hevc_put_hevc_epel_uni_w_hv6_8_end_neon
         load_epel_filterh x6, x5
         sub             x1, x1, #4
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ld1             {v16.8h}, [sp], x10
         ld1             {v17.8h}, [sp], x10
         ld1             {v18.8h}, [sp], x10
@@ -3326,7 +3371,7 @@  endfunc
 
 function hevc_put_hevc_epel_uni_w_hv8_8_end_neon
         load_epel_filterh x6, x5
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ld1             {v16.8h}, [sp], x10
         ld1             {v17.8h}, [sp], x10
         ld1             {v18.8h}, [sp], x10
@@ -3376,7 +3421,7 @@  endfunc
 function hevc_put_hevc_epel_uni_w_hv12_8_end_neon
         load_epel_filterh x6, x5
         sub             x1, x1, #8
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ld1             {v16.8h, v17.8h}, [sp], x10
         ld1             {v18.8h, v19.8h}, [sp], x10
         ld1             {v20.8h, v21.8h}, [sp], x10
@@ -3437,7 +3482,7 @@  endfunc
 
 function hevc_put_hevc_epel_uni_w_hv16_8_end_neon
         load_epel_filterh x6, x5
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ld1             {v16.8h, v17.8h}, [sp], x10
         ld1             {v18.8h, v19.8h}, [sp], x10
         ld1             {v20.8h, v21.8h}, [sp], x10
@@ -3498,7 +3543,7 @@  endfunc
 
 function hevc_put_hevc_epel_uni_w_hv24_8_end_neon
         load_epel_filterh x6, x5
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ld1             {v16.8h, v17.8h, v18.8h}, [sp], x10
         ld1             {v19.8h, v20.8h, v21.8h}, [sp], x10
         ld1             {v22.8h, v23.8h, v24.8h}, [sp], x10
@@ -3795,7 +3840,7 @@  epel_uni_w_hv neon
 
 function hevc_put_hevc_epel_bi_hv4_8_end_neon
         load_epel_filterh x7, x6
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ld1             {v16.4h}, [sp], x10
         ld1             {v17.4h}, [sp], x10
         ld1             {v18.4h}, [sp], x10
@@ -3816,7 +3861,7 @@  endfunc
 function hevc_put_hevc_epel_bi_hv6_8_end_neon
         load_epel_filterh x7, x6
         sub             x1, x1, #4
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ld1             {v16.8h}, [sp], x10
         ld1             {v17.8h}, [sp], x10
         ld1             {v18.8h}, [sp], x10
@@ -3838,7 +3883,7 @@  endfunc
 
 function hevc_put_hevc_epel_bi_hv8_8_end_neon
         load_epel_filterh x7, x6
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ld1             {v16.8h}, [sp], x10
         ld1             {v17.8h}, [sp], x10
         ld1             {v18.8h}, [sp], x10
@@ -3860,7 +3905,7 @@  endfunc
 function hevc_put_hevc_epel_bi_hv12_8_end_neon
         load_epel_filterh x7, x6
         sub             x1, x1, #8
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ld1             {v16.8h, v17.8h}, [sp], x10
         ld1             {v18.8h, v19.8h}, [sp], x10
         ld1             {v20.8h, v21.8h}, [sp], x10
@@ -3885,7 +3930,7 @@  endfunc
 
 function hevc_put_hevc_epel_bi_hv16_8_end_neon
         load_epel_filterh x7, x6
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ld1             {v16.8h, v17.8h}, [sp], x10
         ld1             {v18.8h, v19.8h}, [sp], x10
         ld1             {v20.8h, v21.8h}, [sp], x10
@@ -3910,7 +3955,7 @@  endfunc
 
 function hevc_put_hevc_epel_bi_hv24_8_end_neon
         load_epel_filterh x7, x6
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ld1             {v16.8h, v17.8h, v18.8h}, [sp], x10
         ld1             {v19.8h, v20.8h, v21.8h}, [sp], x10
         ld1             {v22.8h, v23.8h, v24.8h}, [sp], x10
@@ -3939,7 +3984,7 @@  endfunc
 
 function hevc_put_hevc_epel_bi_hv32_8_end_neon
         load_epel_filterh x7, x6
-        mov             x10, #(MAX_PB_SIZE * 2)
+        mov             x10, #(HEVC_MAX_PB_SIZE * 2)
         ld1             {v16.8h, v17.8h, v18.8h, v19.8h}, [sp], x10
         ld1             {v20.8h, v21.8h, v22.8h, v23.8h}, [sp], x10
         ld1             {v24.8h, v25.8h, v26.8h, v27.8h}, [sp], x10
diff --git a/libavcodec/aarch64/h26x/qpel_neon.S b/libavcodec/aarch64/h26x/qpel_neon.S
index a05009c9d6..47b3948f8b 100644
--- a/libavcodec/aarch64/h26x/qpel_neon.S
+++ b/libavcodec/aarch64/h26x/qpel_neon.S
@@ -1250,6 +1250,10 @@  function ff_hevc_put_hevc_qpel_bi_v64_8_neon, export=1
         b               X(ff_hevc_put_hevc_qpel_bi_v32_8_neon)
 endfunc
 
+function ff_vvc_put_pel_uni_pixels4_8_neon, export=1
+        b               X(ff_hevc_put_hevc_pel_uni_pixels4_8_neon)
+endfunc
+
 function ff_hevc_put_hevc_pel_uni_pixels4_8_neon, export=1
 1:
         ldr             s0, [x2]
@@ -1278,6 +1282,10 @@  function ff_hevc_put_hevc_pel_uni_pixels6_8_neon, export=1
         ret
 endfunc
 
+function ff_vvc_put_pel_uni_pixels8_8_neon, export=1
+        b               X(ff_hevc_put_hevc_pel_uni_pixels8_8_neon)
+endfunc
+
 function ff_hevc_put_hevc_pel_uni_pixels8_8_neon, export=1
 1:
         ldr             d0, [x2]
@@ -1306,6 +1314,10 @@  function ff_hevc_put_hevc_pel_uni_pixels12_8_neon, export=1
         ret
 endfunc
 
+function ff_vvc_put_pel_uni_pixels16_8_neon, export=1
+        b               X(ff_hevc_put_hevc_pel_uni_pixels16_8_neon)
+endfunc
+
 function ff_hevc_put_hevc_pel_uni_pixels16_8_neon, export=1
 1:
         ldr             q0, [x2]
@@ -1328,6 +1340,10 @@  function ff_hevc_put_hevc_pel_uni_pixels24_8_neon, export=1
         ret
 endfunc
 
+function ff_vvc_put_pel_uni_pixels32_8_neon, export=1
+        b               X(ff_hevc_put_hevc_pel_uni_pixels32_8_neon)
+endfunc
+
 function ff_hevc_put_hevc_pel_uni_pixels32_8_neon, export=1
 1:
         ld1             {v0.16b, v1.16b}, [x2], x3
@@ -1346,6 +1362,10 @@  function ff_hevc_put_hevc_pel_uni_pixels48_8_neon, export=1
         ret
 endfunc
 
+function ff_vvc_put_pel_uni_pixels64_8_neon, export=1
+        b               X(ff_hevc_put_hevc_pel_uni_pixels64_8_neon)
+endfunc
+
 function ff_hevc_put_hevc_pel_uni_pixels64_8_neon, export=1
 1:
         ld1             {v0.16b, v1.16b, v2.16b, v3.16b}, [x2], x3
@@ -1355,6 +1375,19 @@  function ff_hevc_put_hevc_pel_uni_pixels64_8_neon, export=1
         ret
 endfunc
 
+function ff_vvc_put_pel_uni_pixels128_8_neon, export=1
+        sub             x1, x1, #64
+        sub             x3, x3, #64
+1:
+        ld1             {v0.16b, v1.16b, v2.16b, v3.16b}, [x2], #64
+        subs            w4, w4, #1
+        ld1             {v4.16b, v5.16b, v6.16b, v7.16b}, [x2], x3
+        st1             {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #64
+        st1             {v4.16b, v5.16b, v6.16b, v7.16b}, [x0], x1
+        b.ne            1b
+        ret
+endfunc
+
 function ff_hevc_put_hevc_qpel_uni_v4_8_neon, export=1
         load_qpel_filterb x6, x5
         sub             x2, x2, x3, lsl #1
@@ -1528,6 +1561,10 @@  function ff_hevc_put_hevc_qpel_uni_v64_8_neon, export=1
         b               X(ff_hevc_put_hevc_qpel_uni_v16_8_neon)
 endfunc
 
+function ff_vvc_put_pel_uni_w_pixels4_8_neon, export=1
+        b               X(ff_hevc_put_hevc_pel_uni_w_pixels4_8_neon)
+endfunc
+
 function ff_hevc_put_hevc_pel_uni_w_pixels4_8_neon, export=1
         mov             w10, #-6
         sub             w10, w10, w5
@@ -1598,6 +1635,10 @@  function ff_hevc_put_hevc_pel_uni_w_pixels6_8_neon, export=1
         ret
 endfunc
 
+function ff_vvc_put_pel_uni_w_pixels8_8_neon, export=1
+        b               X(ff_hevc_put_hevc_pel_uni_w_pixels8_8_neon)
+endfunc
+
 function ff_hevc_put_hevc_pel_uni_w_pixels8_8_neon, export=1
         mov             w10, #-6
         sub             w10, w10, w5
@@ -1741,7 +1782,9 @@  function ff_hevc_put_hevc_pel_uni_w_pixels16_8_neon, export=1
         ret
 endfunc
 
-
+function ff_vvc_put_pel_uni_w_pixels16_8_neon, export=1
+        b               X(ff_hevc_put_hevc_pel_uni_w_pixels16_8_neon)
+endfunc
 
 function ff_hevc_put_hevc_pel_uni_w_pixels24_8_neon, export=1
         mov             w10, #-6
@@ -1803,6 +1846,9 @@  function ff_hevc_put_hevc_pel_uni_w_pixels32_8_neon, export=1
         ret
 endfunc
 
+function ff_vvc_put_pel_uni_w_pixels32_8_neon, export=1
+        b               X(ff_hevc_put_hevc_pel_uni_w_pixels32_8_neon)
+endfunc
 
 function ff_hevc_put_hevc_pel_uni_w_pixels48_8_neon, export=1
         mov             w10, #-6
@@ -1839,6 +1885,39 @@  function ff_hevc_put_hevc_pel_uni_w_pixels64_8_neon, export=1
         ret
 endfunc
 
+function ff_vvc_put_pel_uni_w_pixels64_8_neon, export=1
+        b               X(ff_hevc_put_hevc_pel_uni_w_pixels64_8_neon)
+endfunc
+
+function ff_vvc_put_pel_uni_w_pixels128_8_neon, export=1
+        mov             w10, #-6
+        sub             w10, w10, w5
+        dup             v30.8h, w6
+        dup             v31.4s, w10
+        dup             v29.4s, w7
+        sub             x1, x1, #64
+        sub             x3, x3, #64
+1:
+        mov             x11, x2
+        mov             x12, x0
+        ld1             {v0.16b, v1.16b, v2.16b, v3.16b}, [x2], #64
+        PEL_UNI_W_PIXEL_CALC v0, v4, v5, v16, v17, v18, v19
+        PEL_UNI_W_PIXEL_CALC v1, v6, v7, v20, v21, v22, v23
+        PEL_UNI_W_PIXEL_CALC v2, v4, v5, v16, v17, v18, v19
+        PEL_UNI_W_PIXEL_CALC v3, v6, v7, v20, v21, v22, v23
+        st1             {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #64
+
+        ld1             {v0.16b, v1.16b, v2.16b, v3.16b}, [x2], x3
+        subs            w4, w4, #1
+        PEL_UNI_W_PIXEL_CALC v0, v4, v5, v16, v17, v18, v19
+        PEL_UNI_W_PIXEL_CALC v1, v6, v7, v20, v21, v22, v23
+        PEL_UNI_W_PIXEL_CALC v2, v4, v5, v16, v17, v18, v19
+        PEL_UNI_W_PIXEL_CALC v3, v6, v7, v20, v21, v22, v23
+        st1             {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1
+        b.ne            1b
+        ret
+endfunc
+
 .macro QPEL_UNI_W_V_HEADER
         ldur            x12, [sp, #8]          // my
         sub             x2, x2, x3, lsl #1
diff --git a/libavcodec/aarch64/vvc/Makefile b/libavcodec/aarch64/vvc/Makefile
index a5ad24dfc5..a1c1f03e27 100644
--- a/libavcodec/aarch64/vvc/Makefile
+++ b/libavcodec/aarch64/vvc/Makefile
@@ -3,5 +3,6 @@  clean::
 
 OBJS-$(CONFIG_VVC_DECODER)              += aarch64/vvc/dsp_init.o
 NEON-OBJS-$(CONFIG_VVC_DECODER)         += aarch64/vvc/alf.o \
+                                           aarch64/h26x/epel_neon.o \
                                            aarch64/h26x/qpel_neon.o \
                                            aarch64/h26x/sao_neon.o
diff --git a/libavcodec/aarch64/vvc/dsp_init.c b/libavcodec/aarch64/vvc/dsp_init.c
index ea6245d9a3..457be8c725 100644
--- a/libavcodec/aarch64/vvc/dsp_init.c
+++ b/libavcodec/aarch64/vvc/dsp_init.c
@@ -46,6 +46,13 @@  void ff_vvc_dsp_init_aarch64(VVCDSPContext *const c, const int bd)
         return;
 
     if (bd == 8) {
+        c->inter.put[0][1][0][0] = ff_vvc_put_pel_pixels4_8_neon;
+        c->inter.put[0][2][0][0] = ff_vvc_put_pel_pixels8_8_neon;
+        c->inter.put[0][3][0][0] = ff_vvc_put_pel_pixels16_8_neon;
+        c->inter.put[0][4][0][0] = ff_vvc_put_pel_pixels32_8_neon;
+        c->inter.put[0][5][0][0] = ff_vvc_put_pel_pixels64_8_neon;
+        c->inter.put[0][6][0][0] = ff_vvc_put_pel_pixels128_8_neon;
+
         c->inter.put[0][1][0][1] = ff_vvc_put_qpel_h4_8_neon;
         c->inter.put[0][2][0][1] = ff_vvc_put_qpel_h8_8_neon;
         c->inter.put[0][3][0][1] = ff_vvc_put_qpel_h16_8_neon;
@@ -53,6 +60,13 @@  void ff_vvc_dsp_init_aarch64(VVCDSPContext *const c, const int bd)
         c->inter.put[0][5][0][1] =
         c->inter.put[0][6][0][1] = ff_vvc_put_qpel_h32_8_neon;
 
+        c->inter.put_uni[0][1][0][0] = ff_vvc_put_pel_uni_pixels4_8_neon;
+        c->inter.put_uni[0][2][0][0] = ff_vvc_put_pel_uni_pixels8_8_neon;
+        c->inter.put_uni[0][3][0][0] = ff_vvc_put_pel_uni_pixels16_8_neon;
+        c->inter.put_uni[0][4][0][0] = ff_vvc_put_pel_uni_pixels32_8_neon;
+        c->inter.put_uni[0][5][0][0] = ff_vvc_put_pel_uni_pixels64_8_neon;
+        c->inter.put_uni[0][6][0][0] = ff_vvc_put_pel_uni_pixels128_8_neon;
+
         c->inter.put_uni[0][1][0][1] = ff_vvc_put_qpel_uni_h4_8_neon;
         c->inter.put_uni[0][2][0][1] = ff_vvc_put_qpel_uni_h8_8_neon;
         c->inter.put_uni[0][3][0][1] = ff_vvc_put_qpel_uni_h16_8_neon;
@@ -60,6 +74,13 @@  void ff_vvc_dsp_init_aarch64(VVCDSPContext *const c, const int bd)
         c->inter.put_uni[0][5][0][1] =
         c->inter.put_uni[0][6][0][1] = ff_vvc_put_qpel_uni_h32_8_neon;
 
+        c->inter.put_uni_w[0][1][0][0] = ff_vvc_put_pel_uni_w_pixels4_8_neon;
+        c->inter.put_uni_w[0][2][0][0] = ff_vvc_put_pel_uni_w_pixels8_8_neon;
+        c->inter.put_uni_w[0][3][0][0] = ff_vvc_put_pel_uni_w_pixels16_8_neon;
+        c->inter.put_uni_w[0][4][0][0] = ff_vvc_put_pel_uni_w_pixels32_8_neon;
+        c->inter.put_uni_w[0][5][0][0] = ff_vvc_put_pel_uni_w_pixels64_8_neon;
+        c->inter.put_uni_w[0][6][0][0] = ff_vvc_put_pel_uni_w_pixels128_8_neon;
+
         for (int i = 0; i < FF_ARRAY_ELEMS(c->sao.band_filter); i++)
             c->sao.band_filter[i] = ff_h26x_sao_band_filter_8x8_8_neon;
         c->sao.edge_filter[0] = ff_vvc_sao_edge_filter_8x8_8_neon;