@@ -248,4 +248,26 @@ NEON8_FNPROTO_PARTIAL_4(qpel, (int16_t *dst, const uint8_t *_src, ptrdiff_t _src
NEON8_FNPROTO_PARTIAL_4(qpel_uni, (uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, int width),)
+#undef NEON8_FNPROTO_PARTIAL_6
+#define NEON8_FNPROTO_PARTIAL_6(fn, args, ext) \
+ void ff_vvc_put_##fn##4_8_neon##ext args; \
+ void ff_vvc_put_##fn##8_8_neon##ext args; \
+ void ff_vvc_put_##fn##16_8_neon##ext args; \
+ void ff_vvc_put_##fn##32_8_neon##ext args; \
+ void ff_vvc_put_##fn##64_8_neon##ext args; \
+ void ff_vvc_put_##fn##128_8_neon##ext args
+
+NEON8_FNPROTO_PARTIAL_6(pel_pixels, (int16_t *dst,
+ const uint8_t *src, ptrdiff_t srcstride, int height,
+ const int8_t *hf, const int8_t *vf, int width),);
+
+NEON8_FNPROTO_PARTIAL_6(pel_uni_pixels, (uint8_t *_dst, ptrdiff_t _dststride,
+ const uint8_t *_src, ptrdiff_t _srcstride, int height,
+ const int8_t *hf, const int8_t *vf, int width),);
+
+NEON8_FNPROTO_PARTIAL_6(pel_uni_w_pixels, (uint8_t *_dst, ptrdiff_t _dststride,
+ const uint8_t *_src, ptrdiff_t _srcstride,
+ int height, int denom, int wx, int ox,
+ const int8_t *hf, const int8_t *vf, int width),);
+
#endif
@@ -19,7 +19,8 @@
*/
#include "libavutil/aarch64/asm.S"
-#define MAX_PB_SIZE 64
+#define HEVC_MAX_PB_SIZE 64
+#define VVC_MAX_PB_SIZE 128
const epel_filters, align=4
.byte 0, 0, 0, 0
@@ -131,8 +132,13 @@ endconst
b.ne 1b
.endm
+function ff_vvc_put_pel_pixels4_8_neon, export=1
+ mov x7, #(VVC_MAX_PB_SIZE * 2)
+ b 1f
+endfunc
+
function ff_hevc_put_hevc_pel_pixels4_8_neon, export=1
- mov x7, #(MAX_PB_SIZE * 2)
+ mov x7, #(HEVC_MAX_PB_SIZE * 2)
1: ld1 {v0.s}[0], [x1], x2
ushll v4.8h, v0.8b, #6
subs w3, w3, #1
@@ -142,7 +148,7 @@ function ff_hevc_put_hevc_pel_pixels4_8_neon, export=1
endfunc
function ff_hevc_put_hevc_pel_pixels6_8_neon, export=1
- mov x7, #(MAX_PB_SIZE * 2 - 8)
+ mov x7, #(HEVC_MAX_PB_SIZE * 2 - 8)
1: ld1 {v0.8b}, [x1], x2
ushll v4.8h, v0.8b, #6
st1 {v4.d}[0], [x0], #8
@@ -152,8 +158,13 @@ function ff_hevc_put_hevc_pel_pixels6_8_neon, export=1
ret
endfunc
+function ff_vvc_put_pel_pixels8_8_neon, export=1
+ mov x7, #(VVC_MAX_PB_SIZE * 2)
+ b 1f
+endfunc
+
function ff_hevc_put_hevc_pel_pixels8_8_neon, export=1
- mov x7, #(MAX_PB_SIZE * 2)
+ mov x7, #(HEVC_MAX_PB_SIZE * 2)
1: ld1 {v0.8b}, [x1], x2
ushll v4.8h, v0.8b, #6
subs w3, w3, #1
@@ -163,7 +174,7 @@ function ff_hevc_put_hevc_pel_pixels8_8_neon, export=1
endfunc
function ff_hevc_put_hevc_pel_pixels12_8_neon, export=1
- mov x7, #(MAX_PB_SIZE * 2 - 16)
+ mov x7, #(HEVC_MAX_PB_SIZE * 2 - 16)
1: ld1 {v0.8b, v1.8b}, [x1], x2
ushll v4.8h, v0.8b, #6
st1 {v4.8h}, [x0], #16
@@ -174,8 +185,13 @@ function ff_hevc_put_hevc_pel_pixels12_8_neon, export=1
ret
endfunc
+function ff_vvc_put_pel_pixels16_8_neon, export=1
+ mov x7, #(VVC_MAX_PB_SIZE * 2)
+ b 1f
+endfunc
+
function ff_hevc_put_hevc_pel_pixels16_8_neon, export=1
- mov x7, #(MAX_PB_SIZE * 2)
+ mov x7, #(HEVC_MAX_PB_SIZE * 2)
1: ld1 {v0.8b, v1.8b}, [x1], x2
ushll v4.8h, v0.8b, #6
ushll v5.8h, v1.8b, #6
@@ -186,7 +202,7 @@ function ff_hevc_put_hevc_pel_pixels16_8_neon, export=1
endfunc
function ff_hevc_put_hevc_pel_pixels24_8_neon, export=1
- mov x7, #(MAX_PB_SIZE * 2)
+ mov x7, #(HEVC_MAX_PB_SIZE * 2)
1: ld1 {v0.8b-v2.8b}, [x1], x2
ushll v4.8h, v0.8b, #6
ushll v5.8h, v1.8b, #6
@@ -197,8 +213,13 @@ function ff_hevc_put_hevc_pel_pixels24_8_neon, export=1
ret
endfunc
+function ff_vvc_put_pel_pixels32_8_neon, export=1
+ mov x7, #(VVC_MAX_PB_SIZE * 2)
+ b 1f
+endfunc
+
function ff_hevc_put_hevc_pel_pixels32_8_neon, export=1
- mov x7, #(MAX_PB_SIZE * 2)
+ mov x7, #(HEVC_MAX_PB_SIZE * 2)
1: ld1 {v0.8b-v3.8b}, [x1], x2
ushll v4.8h, v0.8b, #6
ushll v5.8h, v1.8b, #6
@@ -211,7 +232,7 @@ function ff_hevc_put_hevc_pel_pixels32_8_neon, export=1
endfunc
function ff_hevc_put_hevc_pel_pixels48_8_neon, export=1
- mov x7, #(MAX_PB_SIZE)
+ mov x7, #(HEVC_MAX_PB_SIZE)
1: ld1 {v0.16b-v2.16b}, [x1], x2
ushll v4.8h, v0.8b, #6
ushll2 v5.8h, v0.16b, #6
@@ -226,26 +247,50 @@ function ff_hevc_put_hevc_pel_pixels48_8_neon, export=1
ret
endfunc
-function ff_hevc_put_hevc_pel_pixels64_8_neon, export=1
-1: ld1 {v0.16b-v3.16b}, [x1], x2
+.macro put_pel_pixels64_8_neon
ushll v4.8h, v0.8b, #6
ushll2 v5.8h, v0.16b, #6
ushll v6.8h, v1.8b, #6
ushll2 v7.8h, v1.16b, #6
- st1 {v4.8h-v7.8h}, [x0], #(MAX_PB_SIZE)
+ st1 {v4.8h-v7.8h}, [x0], #64
ushll v16.8h, v2.8b, #6
ushll2 v17.8h, v2.16b, #6
ushll v18.8h, v3.8b, #6
ushll2 v19.8h, v3.16b, #6
- subs w3, w3, #1
- st1 {v16.8h-v19.8h}, [x0], #(MAX_PB_SIZE)
- b.ne 1b
+ st1 {v16.8h-v19.8h}, [x0], x7
+.endm
+
+function ff_vvc_put_pel_pixels64_8_neon, export=1
+ mov x7, #(2 * VVC_MAX_PB_SIZE - 64)
+ b 1f
+endfunc
+
+function ff_hevc_put_hevc_pel_pixels64_8_neon, export=1
+ mov x7, #(HEVC_MAX_PB_SIZE)
+1:
+ ld1 {v0.16b-v3.16b}, [x1], x2
+ sub w3, w3, #1
+ put_pel_pixels64_8_neon
+ cbnz w3, 1b
ret
endfunc
+function ff_vvc_put_pel_pixels128_8_neon, export=1
+ mov x7, #64
+1:
+ mov x6, x1
+ ld1 {v0.16b-v3.16b}, [x6], #64
+ add x1, x1, x2
+ sub w3, w3, #1
+ put_pel_pixels64_8_neon
+ ld1 {v0.16b-v3.16b}, [x6], #64
+ put_pel_pixels64_8_neon
+ cbnz w3, 1b
+ ret
+endfunc
function ff_hevc_put_hevc_pel_bi_pixels4_8_neon, export=1
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
1: ld1 {v0.s}[0], [x2], x3 // src
ushll v16.8h, v0.8b, #6
ld1 {v20.4h}, [x4], x10 // src2
@@ -258,7 +303,7 @@ function ff_hevc_put_hevc_pel_bi_pixels4_8_neon, export=1
endfunc
function ff_hevc_put_hevc_pel_bi_pixels6_8_neon, export=1
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
sub x1, x1, #4
1: ld1 {v0.8b}, [x2], x3
ushll v16.8h, v0.8b, #6
@@ -273,7 +318,7 @@ function ff_hevc_put_hevc_pel_bi_pixels6_8_neon, export=1
endfunc
function ff_hevc_put_hevc_pel_bi_pixels8_8_neon, export=1
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
1: ld1 {v0.8b}, [x2], x3 // src
ushll v16.8h, v0.8b, #6
ld1 {v20.8h}, [x4], x10 // src2
@@ -286,7 +331,7 @@ function ff_hevc_put_hevc_pel_bi_pixels8_8_neon, export=1
endfunc
function ff_hevc_put_hevc_pel_bi_pixels12_8_neon, export=1
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
sub x1, x1, #8
1: ld1 {v0.16b}, [x2], x3
ushll v16.8h, v0.8b, #6
@@ -304,7 +349,7 @@ function ff_hevc_put_hevc_pel_bi_pixels12_8_neon, export=1
endfunc
function ff_hevc_put_hevc_pel_bi_pixels16_8_neon, export=1
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
1: ld1 {v0.16b}, [x2], x3 // src
ushll v16.8h, v0.8b, #6
ushll2 v17.8h, v0.16b, #6
@@ -320,7 +365,7 @@ function ff_hevc_put_hevc_pel_bi_pixels16_8_neon, export=1
endfunc
function ff_hevc_put_hevc_pel_bi_pixels24_8_neon, export=1
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
1: ld1 {v0.8b-v2.8b}, [x2], x3 // src
ushll v16.8h, v0.8b, #6
ushll v17.8h, v1.8b, #6
@@ -339,7 +384,7 @@ function ff_hevc_put_hevc_pel_bi_pixels24_8_neon, export=1
endfunc
function ff_hevc_put_hevc_pel_bi_pixels32_8_neon, export=1
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
1: ld1 {v0.16b-v1.16b}, [x2], x3 // src
ushll v16.8h, v0.8b, #6
ushll2 v17.8h, v0.16b, #6
@@ -361,7 +406,7 @@ function ff_hevc_put_hevc_pel_bi_pixels32_8_neon, export=1
endfunc
function ff_hevc_put_hevc_pel_bi_pixels48_8_neon, export=1
- mov x10, #(MAX_PB_SIZE)
+ mov x10, #(HEVC_MAX_PB_SIZE)
1: ld1 {v0.16b-v2.16b}, [x2], x3 // src
ushll v16.8h, v0.8b, #6
ushll2 v17.8h, v0.16b, #6
@@ -369,7 +414,7 @@ function ff_hevc_put_hevc_pel_bi_pixels48_8_neon, export=1
ushll2 v19.8h, v1.16b, #6
ushll v20.8h, v2.8b, #6
ushll2 v21.8h, v2.16b, #6
- ld1 {v24.8h-v27.8h}, [x4], #(MAX_PB_SIZE) // src2
+ ld1 {v24.8h-v27.8h}, [x4], #(HEVC_MAX_PB_SIZE) // src2
sqadd v16.8h, v16.8h, v24.8h
sqadd v17.8h, v17.8h, v25.8h
sqadd v18.8h, v18.8h, v26.8h
@@ -399,12 +444,12 @@ function ff_hevc_put_hevc_pel_bi_pixels64_8_neon, export=1
ushll2 v21.8h, v2.16b, #6
ushll v22.8h, v3.8b, #6
ushll2 v23.8h, v3.16b, #6
- ld1 {v24.8h, v25.8h, v26.8h, v27.8h}, [x4], #(MAX_PB_SIZE) // src2
+ ld1 {v24.8h, v25.8h, v26.8h, v27.8h}, [x4], #(HEVC_MAX_PB_SIZE) // src2
sqadd v16.8h, v16.8h, v24.8h
sqadd v17.8h, v17.8h, v25.8h
sqadd v18.8h, v18.8h, v26.8h
sqadd v19.8h, v19.8h, v27.8h
- ld1 {v24.8h, v25.8h, v26.8h, v27.8h}, [x4], #(MAX_PB_SIZE)
+ ld1 {v24.8h, v25.8h, v26.8h, v27.8h}, [x4], #(HEVC_MAX_PB_SIZE)
sqadd v20.8h, v20.8h, v24.8h
sqadd v21.8h, v21.8h, v25.8h
sqadd v22.8h, v22.8h, v26.8h
@@ -427,7 +472,7 @@ endfunc
function ff_hevc_put_hevc_epel_bi_h4_8_neon, export=1
load_epel_filterb x6, x7
sub x2, x2, #1
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
1: ld1 {v4.8b}, [x2], x3
ext v5.8b, v4.8b, v4.8b, #1
ext v6.8b, v4.8b, v4.8b, #2
@@ -446,7 +491,7 @@ function ff_hevc_put_hevc_epel_bi_h6_8_neon, export=1
load_epel_filterb x6, x7
sub w1, w1, #4
sub x2, x2, #1
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
1: ld1 {v24.16b}, [x2], x3
ext v26.16b, v24.16b, v24.16b, #1
ext v27.16b, v24.16b, v24.16b, #2
@@ -465,7 +510,7 @@ endfunc
function ff_hevc_put_hevc_epel_bi_h8_8_neon, export=1
load_epel_filterb x6, x7
sub x2, x2, #1
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
1: ld1 {v24.16b}, [x2], x3
ext v26.16b, v24.16b, v24.16b, #1
ext v27.16b, v24.16b, v24.16b, #2
@@ -484,7 +529,7 @@ function ff_hevc_put_hevc_epel_bi_h12_8_neon, export=1
load_epel_filterb x6, x7
sub x1, x1, #8
sub x2, x2, #1
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
1: ld1 {v24.16b}, [x2], x3
ext v26.16b, v24.16b, v24.16b, #1
ext v27.16b, v24.16b, v24.16b, #2
@@ -506,7 +551,7 @@ endfunc
function ff_hevc_put_hevc_epel_bi_h16_8_neon, export=1
load_epel_filterb x6, x7
sub x2, x2, #1
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
1: ldr q24, [x2]
ldr s25, [x2, #16]
add x2, x2, x3
@@ -529,7 +574,7 @@ endfunc
function ff_hevc_put_hevc_epel_bi_h24_8_neon, export=1
load_epel_filterb x6, x7
sub x2, x2, #1
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
1: ld1 {v24.16b, v25.16b}, [x2], x3
ext v26.16b, v24.16b, v25.16b, #1
ext v27.16b, v24.16b, v25.16b, #2
@@ -556,7 +601,7 @@ endfunc
function ff_hevc_put_hevc_epel_bi_h32_8_neon, export=1
load_epel_filterb x6, x7
sub x2, x2, #1
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
1: ldp q24, q25, [x2]
ldr s26, [x2, #32]
add x2, x2, x3
@@ -589,7 +634,7 @@ function ff_hevc_put_hevc_epel_bi_h48_8_neon, export=1
load_epel_filterb x6, x7
sub x2, x2, #1
mov x7, #24
- mov x10, #(MAX_PB_SIZE * 2 - 48)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2 - 48)
1: ld1 {v24.16b, v25.16b, v26.16b}, [x2]
ldr s27, [x2, #48]
add x2, x2, x3
@@ -683,7 +728,7 @@ endfunc
function ff_hevc_put_hevc_epel_bi_v4_8_neon, export=1
load_epel_filterb x7, x6
sub x2, x2, x3
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ld1 {v16.s}[0], [x2], x3
ld1 {v17.s}[0], [x2], x3
ld1 {v18.s}[0], [x2], x3
@@ -705,7 +750,7 @@ function ff_hevc_put_hevc_epel_bi_v6_8_neon, export=1
load_epel_filterb x7, x6
sub x2, x2, x3
sub x1, x1, #4
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ld1 {v16.8b}, [x2], x3
ld1 {v17.8b}, [x2], x3
ld1 {v18.8b}, [x2], x3
@@ -727,7 +772,7 @@ endfunc
function ff_hevc_put_hevc_epel_bi_v8_8_neon, export=1
load_epel_filterb x7, x6
sub x2, x2, x3
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ld1 {v16.8b}, [x2], x3
ld1 {v17.8b}, [x2], x3
ld1 {v18.8b}, [x2], x3
@@ -749,7 +794,7 @@ function ff_hevc_put_hevc_epel_bi_v12_8_neon, export=1
load_epel_filterb x7, x6
sub x1, x1, #8
sub x2, x2, x3
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ld1 {v16.16b}, [x2], x3
ld1 {v17.16b}, [x2], x3
ld1 {v18.16b}, [x2], x3
@@ -774,7 +819,7 @@ endfunc
function ff_hevc_put_hevc_epel_bi_v16_8_neon, export=1
load_epel_filterb x7, x6
sub x2, x2, x3
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ld1 {v16.16b}, [x2], x3
ld1 {v17.16b}, [x2], x3
ld1 {v18.16b}, [x2], x3
@@ -798,7 +843,7 @@ endfunc
function ff_hevc_put_hevc_epel_bi_v24_8_neon, export=1
load_epel_filterb x7, x6
sub x2, x2, x3
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ld1 {v16.8b, v17.8b, v18.8b}, [x2], x3
ld1 {v19.8b, v20.8b, v21.8b}, [x2], x3
ld1 {v22.8b, v23.8b, v24.8b}, [x2], x3
@@ -825,7 +870,7 @@ endfunc
function ff_hevc_put_hevc_epel_bi_v32_8_neon, export=1
load_epel_filterb x7, x6
sub x2, x2, x3
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ld1 {v16.16b, v17.16b}, [x2], x3
ld1 {v18.16b, v19.16b}, [x2], x3
ld1 {v20.16b, v21.16b}, [x2], x3
@@ -895,7 +940,7 @@ endfunc
function ff_hevc_put_hevc_epel_v4_8_neon, export=1
load_epel_filterb x5, x4
sub x1, x1, x2
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ldr s16, [x1]
ldr s17, [x1, x2]
add x1, x1, x2, lsl #1
@@ -915,7 +960,7 @@ endfunc
function ff_hevc_put_hevc_epel_v6_8_neon, export=1
load_epel_filterb x5, x4
sub x1, x1, x2
- mov x10, #(MAX_PB_SIZE * 2 - 8)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2 - 8)
ldr d16, [x1]
ldr d17, [x1, x2]
add x1, x1, x2, lsl #1
@@ -936,7 +981,7 @@ endfunc
function ff_hevc_put_hevc_epel_v8_8_neon, export=1
load_epel_filterb x5, x4
sub x1, x1, x2
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ldr d16, [x1]
ldr d17, [x1, x2]
add x1, x1, x2, lsl #1
@@ -956,7 +1001,7 @@ endfunc
function ff_hevc_put_hevc_epel_v12_8_neon, export=1
load_epel_filterb x5, x4
sub x1, x1, x2
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ldr q16, [x1]
ldr q17, [x1, x2]
add x1, x1, x2, lsl #1
@@ -980,7 +1025,7 @@ endfunc
function ff_hevc_put_hevc_epel_v16_8_neon, export=1
load_epel_filterb x5, x4
sub x1, x1, x2
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ldr q16, [x1]
ldr q17, [x1, x2]
add x1, x1, x2, lsl #1
@@ -1002,7 +1047,7 @@ endfunc
function ff_hevc_put_hevc_epel_v24_8_neon, export=1
load_epel_filterb x5, x4
sub x1, x1, x2
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ld1 {v16.8b, v17.8b, v18.8b}, [x1], x2
ld1 {v19.8b, v20.8b, v21.8b}, [x1], x2
ld1 {v22.8b, v23.8b, v24.8b}, [x1], x2
@@ -1025,7 +1070,7 @@ endfunc
function ff_hevc_put_hevc_epel_v32_8_neon, export=1
load_epel_filterb x5, x4
sub x1, x1, x2
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ld1 {v16.16b, v17.16b}, [x1], x2
ld1 {v18.16b, v19.16b}, [x1], x2
ld1 {v20.16b, v21.16b}, [x1], x2
@@ -1327,7 +1372,7 @@ endfunc
add x5, x5, x4, lsl #2
ld1r {v30.4s}, [x5]
sub x1, x1, #1
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
.endm
function ff_hevc_put_hevc_epel_h4_8_neon, export=1
@@ -2179,7 +2224,7 @@ DISABLE_I8MM
function hevc_put_hevc_epel_hv4_8_end_neon
load_epel_filterh x5, x4
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ldr d16, [sp]
ldr d17, [sp, x10]
add sp, sp, x10, lsl #1
@@ -2198,7 +2243,7 @@ endfunc
function hevc_put_hevc_epel_hv6_8_end_neon
load_epel_filterh x5, x4
mov x5, #120
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ldr q16, [sp]
ldr q17, [sp, x10]
add sp, sp, x10, lsl #1
@@ -2218,7 +2263,7 @@ endfunc
function hevc_put_hevc_epel_hv8_8_end_neon
load_epel_filterh x5, x4
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ldr q16, [sp]
ldr q17, [sp, x10]
add sp, sp, x10, lsl #1
@@ -2238,7 +2283,7 @@ endfunc
function hevc_put_hevc_epel_hv12_8_end_neon
load_epel_filterh x5, x4
mov x5, #112
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ld1 {v16.8h, v17.8h}, [sp], x10
ld1 {v18.8h, v19.8h}, [sp], x10
ld1 {v20.8h, v21.8h}, [sp], x10
@@ -2258,7 +2303,7 @@ endfunc
function hevc_put_hevc_epel_hv16_8_end_neon
load_epel_filterh x5, x4
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ld1 {v16.8h, v17.8h}, [sp], x10
ld1 {v18.8h, v19.8h}, [sp], x10
ld1 {v20.8h, v21.8h}, [sp], x10
@@ -2278,7 +2323,7 @@ endfunc
function hevc_put_hevc_epel_hv24_8_end_neon
load_epel_filterh x5, x4
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ld1 {v16.8h, v17.8h, v18.8h}, [sp], x10
ld1 {v19.8h, v20.8h, v21.8h}, [sp], x10
ld1 {v22.8h, v23.8h, v24.8h}, [sp], x10
@@ -2462,7 +2507,7 @@ epel_hv neon
function hevc_put_hevc_epel_uni_hv4_8_end_neon
load_epel_filterh x6, x5
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ld1 {v16.4h}, [sp], x10
ld1 {v17.4h}, [sp], x10
ld1 {v18.4h}, [sp], x10
@@ -2481,7 +2526,7 @@ endfunc
function hevc_put_hevc_epel_uni_hv6_8_end_neon
load_epel_filterh x6, x5
sub x1, x1, #4
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ld1 {v16.8h}, [sp], x10
ld1 {v17.8h}, [sp], x10
ld1 {v18.8h}, [sp], x10
@@ -2501,7 +2546,7 @@ endfunc
function hevc_put_hevc_epel_uni_hv8_8_end_neon
load_epel_filterh x6, x5
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ld1 {v16.8h}, [sp], x10
ld1 {v17.8h}, [sp], x10
ld1 {v18.8h}, [sp], x10
@@ -2521,7 +2566,7 @@ endfunc
function hevc_put_hevc_epel_uni_hv12_8_end_neon
load_epel_filterh x6, x5
sub x1, x1, #8
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ld1 {v16.8h, v17.8h}, [sp], x10
ld1 {v18.8h, v19.8h}, [sp], x10
ld1 {v20.8h, v21.8h}, [sp], x10
@@ -2543,7 +2588,7 @@ endfunc
function hevc_put_hevc_epel_uni_hv16_8_end_neon
load_epel_filterh x6, x5
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ld1 {v16.8h, v17.8h}, [sp], x10
ld1 {v18.8h, v19.8h}, [sp], x10
ld1 {v20.8h, v21.8h}, [sp], x10
@@ -2565,7 +2610,7 @@ endfunc
function hevc_put_hevc_epel_uni_hv24_8_end_neon
load_epel_filterh x6, x5
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ld1 {v16.8h, v17.8h, v18.8h}, [sp], x10
ld1 {v19.8h, v20.8h, v21.8h}, [sp], x10
ld1 {v22.8h, v23.8h, v24.8h}, [sp], x10
@@ -3223,7 +3268,7 @@ DISABLE_I8MM
function hevc_put_hevc_epel_uni_w_hv4_8_end_neon
load_epel_filterh x6, x5
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ld1 {v16.4h}, [sp], x10
ld1 {v17.4h}, [sp], x10
ld1 {v18.4h}, [sp], x10
@@ -3273,7 +3318,7 @@ endfunc
function hevc_put_hevc_epel_uni_w_hv6_8_end_neon
load_epel_filterh x6, x5
sub x1, x1, #4
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ld1 {v16.8h}, [sp], x10
ld1 {v17.8h}, [sp], x10
ld1 {v18.8h}, [sp], x10
@@ -3326,7 +3371,7 @@ endfunc
function hevc_put_hevc_epel_uni_w_hv8_8_end_neon
load_epel_filterh x6, x5
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ld1 {v16.8h}, [sp], x10
ld1 {v17.8h}, [sp], x10
ld1 {v18.8h}, [sp], x10
@@ -3376,7 +3421,7 @@ endfunc
function hevc_put_hevc_epel_uni_w_hv12_8_end_neon
load_epel_filterh x6, x5
sub x1, x1, #8
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ld1 {v16.8h, v17.8h}, [sp], x10
ld1 {v18.8h, v19.8h}, [sp], x10
ld1 {v20.8h, v21.8h}, [sp], x10
@@ -3437,7 +3482,7 @@ endfunc
function hevc_put_hevc_epel_uni_w_hv16_8_end_neon
load_epel_filterh x6, x5
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ld1 {v16.8h, v17.8h}, [sp], x10
ld1 {v18.8h, v19.8h}, [sp], x10
ld1 {v20.8h, v21.8h}, [sp], x10
@@ -3498,7 +3543,7 @@ endfunc
function hevc_put_hevc_epel_uni_w_hv24_8_end_neon
load_epel_filterh x6, x5
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ld1 {v16.8h, v17.8h, v18.8h}, [sp], x10
ld1 {v19.8h, v20.8h, v21.8h}, [sp], x10
ld1 {v22.8h, v23.8h, v24.8h}, [sp], x10
@@ -3795,7 +3840,7 @@ epel_uni_w_hv neon
function hevc_put_hevc_epel_bi_hv4_8_end_neon
load_epel_filterh x7, x6
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ld1 {v16.4h}, [sp], x10
ld1 {v17.4h}, [sp], x10
ld1 {v18.4h}, [sp], x10
@@ -3816,7 +3861,7 @@ endfunc
function hevc_put_hevc_epel_bi_hv6_8_end_neon
load_epel_filterh x7, x6
sub x1, x1, #4
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ld1 {v16.8h}, [sp], x10
ld1 {v17.8h}, [sp], x10
ld1 {v18.8h}, [sp], x10
@@ -3838,7 +3883,7 @@ endfunc
function hevc_put_hevc_epel_bi_hv8_8_end_neon
load_epel_filterh x7, x6
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ld1 {v16.8h}, [sp], x10
ld1 {v17.8h}, [sp], x10
ld1 {v18.8h}, [sp], x10
@@ -3860,7 +3905,7 @@ endfunc
function hevc_put_hevc_epel_bi_hv12_8_end_neon
load_epel_filterh x7, x6
sub x1, x1, #8
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ld1 {v16.8h, v17.8h}, [sp], x10
ld1 {v18.8h, v19.8h}, [sp], x10
ld1 {v20.8h, v21.8h}, [sp], x10
@@ -3885,7 +3930,7 @@ endfunc
function hevc_put_hevc_epel_bi_hv16_8_end_neon
load_epel_filterh x7, x6
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ld1 {v16.8h, v17.8h}, [sp], x10
ld1 {v18.8h, v19.8h}, [sp], x10
ld1 {v20.8h, v21.8h}, [sp], x10
@@ -3910,7 +3955,7 @@ endfunc
function hevc_put_hevc_epel_bi_hv24_8_end_neon
load_epel_filterh x7, x6
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ld1 {v16.8h, v17.8h, v18.8h}, [sp], x10
ld1 {v19.8h, v20.8h, v21.8h}, [sp], x10
ld1 {v22.8h, v23.8h, v24.8h}, [sp], x10
@@ -3939,7 +3984,7 @@ endfunc
function hevc_put_hevc_epel_bi_hv32_8_end_neon
load_epel_filterh x7, x6
- mov x10, #(MAX_PB_SIZE * 2)
+ mov x10, #(HEVC_MAX_PB_SIZE * 2)
ld1 {v16.8h, v17.8h, v18.8h, v19.8h}, [sp], x10
ld1 {v20.8h, v21.8h, v22.8h, v23.8h}, [sp], x10
ld1 {v24.8h, v25.8h, v26.8h, v27.8h}, [sp], x10
@@ -1250,6 +1250,10 @@ function ff_hevc_put_hevc_qpel_bi_v64_8_neon, export=1
b X(ff_hevc_put_hevc_qpel_bi_v32_8_neon)
endfunc
+function ff_vvc_put_pel_uni_pixels4_8_neon, export=1
+ b X(ff_hevc_put_hevc_pel_uni_pixels4_8_neon)
+endfunc
+
function ff_hevc_put_hevc_pel_uni_pixels4_8_neon, export=1
1:
ldr s0, [x2]
@@ -1278,6 +1282,10 @@ function ff_hevc_put_hevc_pel_uni_pixels6_8_neon, export=1
ret
endfunc
+function ff_vvc_put_pel_uni_pixels8_8_neon, export=1
+ b X(ff_hevc_put_hevc_pel_uni_pixels8_8_neon)
+endfunc
+
function ff_hevc_put_hevc_pel_uni_pixels8_8_neon, export=1
1:
ldr d0, [x2]
@@ -1306,6 +1314,10 @@ function ff_hevc_put_hevc_pel_uni_pixels12_8_neon, export=1
ret
endfunc
+function ff_vvc_put_pel_uni_pixels16_8_neon, export=1
+ b X(ff_hevc_put_hevc_pel_uni_pixels16_8_neon)
+endfunc
+
function ff_hevc_put_hevc_pel_uni_pixels16_8_neon, export=1
1:
ldr q0, [x2]
@@ -1328,6 +1340,10 @@ function ff_hevc_put_hevc_pel_uni_pixels24_8_neon, export=1
ret
endfunc
+function ff_vvc_put_pel_uni_pixels32_8_neon, export=1
+ b X(ff_hevc_put_hevc_pel_uni_pixels32_8_neon)
+endfunc
+
function ff_hevc_put_hevc_pel_uni_pixels32_8_neon, export=1
1:
ld1 {v0.16b, v1.16b}, [x2], x3
@@ -1346,6 +1362,10 @@ function ff_hevc_put_hevc_pel_uni_pixels48_8_neon, export=1
ret
endfunc
+function ff_vvc_put_pel_uni_pixels64_8_neon, export=1
+ b X(ff_hevc_put_hevc_pel_uni_pixels64_8_neon)
+endfunc
+
function ff_hevc_put_hevc_pel_uni_pixels64_8_neon, export=1
1:
ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x2], x3
@@ -1355,6 +1375,21 @@ function ff_hevc_put_hevc_pel_uni_pixels64_8_neon, export=1
ret
endfunc
+function ff_vvc_put_pel_uni_pixels128_8_neon, export=1
+1:
+ mov x5, x2
+ mov x6, x0
+ ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x5], #64
+ ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [x5]
+ sub w4, w4, #1
+ add x2, x2, x3
+ add x0, x0, x1
+ st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x6], #64
+ st1 {v4.16b, v5.16b, v6.16b, v7.16b}, [x6]
+ cbnz w4, 1b
+ ret
+endfunc
+
function ff_hevc_put_hevc_qpel_uni_v4_8_neon, export=1
load_qpel_filterb x6, x5
sub x2, x2, x3, lsl #1
@@ -1528,6 +1563,10 @@ function ff_hevc_put_hevc_qpel_uni_v64_8_neon, export=1
b X(ff_hevc_put_hevc_qpel_uni_v16_8_neon)
endfunc
+function ff_vvc_put_pel_uni_w_pixels4_8_neon, export=1
+ b X(ff_hevc_put_hevc_pel_uni_w_pixels4_8_neon)
+endfunc
+
function ff_hevc_put_hevc_pel_uni_w_pixels4_8_neon, export=1
mov w10, #-6
sub w10, w10, w5
@@ -1598,6 +1637,10 @@ function ff_hevc_put_hevc_pel_uni_w_pixels6_8_neon, export=1
ret
endfunc
+function ff_vvc_put_pel_uni_w_pixels8_8_neon, export=1
+ b X(ff_hevc_put_hevc_pel_uni_w_pixels8_8_neon)
+endfunc
+
function ff_hevc_put_hevc_pel_uni_w_pixels8_8_neon, export=1
mov w10, #-6
sub w10, w10, w5
@@ -1741,7 +1784,9 @@ function ff_hevc_put_hevc_pel_uni_w_pixels16_8_neon, export=1
ret
endfunc
-
+function ff_vvc_put_pel_uni_w_pixels16_8_neon, export=1
+ b X(ff_hevc_put_hevc_pel_uni_w_pixels16_8_neon)
+endfunc
function ff_hevc_put_hevc_pel_uni_w_pixels24_8_neon, export=1
mov w10, #-6
@@ -1803,6 +1848,9 @@ function ff_hevc_put_hevc_pel_uni_w_pixels32_8_neon, export=1
ret
endfunc
+function ff_vvc_put_pel_uni_w_pixels32_8_neon, export=1
+ b X(ff_hevc_put_hevc_pel_uni_w_pixels32_8_neon)
+endfunc
function ff_hevc_put_hevc_pel_uni_w_pixels48_8_neon, export=1
mov w10, #-6
@@ -1839,6 +1887,39 @@ function ff_hevc_put_hevc_pel_uni_w_pixels64_8_neon, export=1
ret
endfunc
+function ff_vvc_put_pel_uni_w_pixels64_8_neon, export=1
+ b X(ff_hevc_put_hevc_pel_uni_w_pixels64_8_neon)
+endfunc
+
+function ff_vvc_put_pel_uni_w_pixels128_8_neon, export=1
+ mov w10, #-6
+ sub w10, w10, w5
+ dup v30.8h, w6
+ dup v31.4s, w10
+ dup v29.4s, w7
+1:
+ mov x11, x2
+ mov x12, x0
+ ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x11], #64
+ add x2, x2, x3
+ add x0, x0, x1
+ PEL_UNI_W_PIXEL_CALC v0, v4, v5, v16, v17, v18, v19
+ PEL_UNI_W_PIXEL_CALC v1, v6, v7, v20, v21, v22, v23
+ PEL_UNI_W_PIXEL_CALC v2, v4, v5, v16, v17, v18, v19
+ PEL_UNI_W_PIXEL_CALC v3, v6, v7, v20, v21, v22, v23
+ st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x12], #64
+
+ ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x11], #64
+ sub w4, w4, #1
+ PEL_UNI_W_PIXEL_CALC v0, v4, v5, v16, v17, v18, v19
+ PEL_UNI_W_PIXEL_CALC v1, v6, v7, v20, v21, v22, v23
+ PEL_UNI_W_PIXEL_CALC v2, v4, v5, v16, v17, v18, v19
+ PEL_UNI_W_PIXEL_CALC v3, v6, v7, v20, v21, v22, v23
+ st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x12], #64
+ cbnz w4, 1b
+ ret
+endfunc
+
.macro QPEL_UNI_W_V_HEADER
ldur x12, [sp, #8] // my
sub x2, x2, x3, lsl #1
@@ -3,5 +3,6 @@ clean::
OBJS-$(CONFIG_VVC_DECODER) += aarch64/vvc/dsp_init.o
NEON-OBJS-$(CONFIG_VVC_DECODER) += aarch64/vvc/alf.o \
+ aarch64/h26x/epel_neon.o \
aarch64/h26x/qpel_neon.o \
aarch64/h26x/sao_neon.o
@@ -46,6 +46,13 @@ void ff_vvc_dsp_init_aarch64(VVCDSPContext *const c, const int bd)
return;
if (bd == 8) {
+ c->inter.put[0][1][0][0] = ff_vvc_put_pel_pixels4_8_neon;
+ c->inter.put[0][2][0][0] = ff_vvc_put_pel_pixels8_8_neon;
+ c->inter.put[0][3][0][0] = ff_vvc_put_pel_pixels16_8_neon;
+ c->inter.put[0][4][0][0] = ff_vvc_put_pel_pixels32_8_neon;
+ c->inter.put[0][5][0][0] = ff_vvc_put_pel_pixels64_8_neon;
+ c->inter.put[0][6][0][0] = ff_vvc_put_pel_pixels128_8_neon;
+
c->inter.put[0][1][0][1] = ff_vvc_put_qpel_h4_8_neon;
c->inter.put[0][2][0][1] = ff_vvc_put_qpel_h8_8_neon;
c->inter.put[0][3][0][1] = ff_vvc_put_qpel_h16_8_neon;
@@ -53,6 +60,13 @@ void ff_vvc_dsp_init_aarch64(VVCDSPContext *const c, const int bd)
c->inter.put[0][5][0][1] =
c->inter.put[0][6][0][1] = ff_vvc_put_qpel_h32_8_neon;
+ c->inter.put_uni[0][1][0][0] = ff_vvc_put_pel_uni_pixels4_8_neon;
+ c->inter.put_uni[0][2][0][0] = ff_vvc_put_pel_uni_pixels8_8_neon;
+ c->inter.put_uni[0][3][0][0] = ff_vvc_put_pel_uni_pixels16_8_neon;
+ c->inter.put_uni[0][4][0][0] = ff_vvc_put_pel_uni_pixels32_8_neon;
+ c->inter.put_uni[0][5][0][0] = ff_vvc_put_pel_uni_pixels64_8_neon;
+ c->inter.put_uni[0][6][0][0] = ff_vvc_put_pel_uni_pixels128_8_neon;
+
c->inter.put_uni[0][1][0][1] = ff_vvc_put_qpel_uni_h4_8_neon;
c->inter.put_uni[0][2][0][1] = ff_vvc_put_qpel_uni_h8_8_neon;
c->inter.put_uni[0][3][0][1] = ff_vvc_put_qpel_uni_h16_8_neon;
@@ -60,6 +74,13 @@ void ff_vvc_dsp_init_aarch64(VVCDSPContext *const c, const int bd)
c->inter.put_uni[0][5][0][1] =
c->inter.put_uni[0][6][0][1] = ff_vvc_put_qpel_uni_h32_8_neon;
+ c->inter.put_uni_w[0][1][0][0] = ff_vvc_put_pel_uni_w_pixels4_8_neon;
+ c->inter.put_uni_w[0][2][0][0] = ff_vvc_put_pel_uni_w_pixels8_8_neon;
+ c->inter.put_uni_w[0][3][0][0] = ff_vvc_put_pel_uni_w_pixels16_8_neon;
+ c->inter.put_uni_w[0][4][0][0] = ff_vvc_put_pel_uni_w_pixels32_8_neon;
+ c->inter.put_uni_w[0][5][0][0] = ff_vvc_put_pel_uni_w_pixels64_8_neon;
+ c->inter.put_uni_w[0][6][0][0] = ff_vvc_put_pel_uni_w_pixels128_8_neon;
+
for (int i = 0; i < FF_ARRAY_ELEMS(c->sao.band_filter); i++)
c->sao.band_filter[i] = ff_h26x_sao_band_filter_8x8_8_neon;
c->sao.edge_filter[0] = ff_vvc_sao_edge_filter_8x8_8_neon;
From: Zhao Zhili <zhilizhao@tencent.com> put_luma_pixels_8_4x4_c: 0.2 ( 1.00x) put_luma_pixels_8_4x4_neon: 0.2 ( 1.00x) put_luma_pixels_8_8x8_c: 0.7 ( 1.00x) put_luma_pixels_8_8x8_neon: 0.2 ( 3.22x) put_luma_pixels_8_16x16_c: 2.2 ( 1.00x) put_luma_pixels_8_16x16_neon: 0.2 ( 9.89x) put_luma_pixels_8_32x32_c: 8.2 ( 1.00x) put_luma_pixels_8_32x32_neon: 1.2 ( 6.71x) put_luma_pixels_8_64x64_c: 33.7 ( 1.00x) put_luma_pixels_8_64x64_neon: 2.5 (13.63x) put_luma_pixels_8_128x128_c: 145.5 ( 1.00x) put_luma_pixels_8_128x128_neon: 10.2 (14.23x) put_uni_pixels_luma_8_4x4_c: 0.5 ( 1.00x) put_uni_pixels_luma_8_4x4_neon: 0.0 ( 0.00x) put_uni_pixels_luma_8_8x8_c: 0.5 ( 1.00x) put_uni_pixels_luma_8_8x8_neon: 0.2 ( 2.11x) put_uni_pixels_luma_8_16x16_c: 1.2 ( 1.00x) put_uni_pixels_luma_8_16x16_neon: 0.2 ( 5.44x) put_uni_pixels_luma_8_32x32_c: 3.0 ( 1.00x) put_uni_pixels_luma_8_32x32_neon: 0.5 ( 6.26x) put_uni_pixels_luma_8_64x64_c: 3.0 ( 1.00x) put_uni_pixels_luma_8_64x64_neon: 1.7 ( 1.72x) put_uni_pixels_luma_8_128x128_c: 6.5 ( 1.00x) put_uni_pixels_luma_8_128x128_neon: 6.5 ( 1.00x) --- libavcodec/aarch64/h26x/dsp.h | 22 ++++ libavcodec/aarch64/h26x/epel_neon.S | 193 +++++++++++++++++----------- libavcodec/aarch64/h26x/qpel_neon.S | 83 +++++++++++- libavcodec/aarch64/vvc/Makefile | 1 + libavcodec/aarch64/vvc/dsp_init.c | 21 +++ 5 files changed, 245 insertions(+), 75 deletions(-)