Message ID | 20230322000710.47513-1-jdek@itanimul.li |
---|---|
State | Accepted |
Commit | 37cde570bc2dcd64a15c5d9a37b9fa0d78d84f9f |
Headers | show |
Series | [FFmpeg-devel,1/3] lavc/aarch64: add clip N macro | expand |
Context | Check | Description |
---|---|---|
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | success | Make fate finished |
On Wed, 22 Mar 2023, J. Dekker wrote: > Signed-off-by: J. Dekker <jdek@itanimul.li> > --- > libavcodec/aarch64/hevcdsp_idct_neon.S | 19 +++++-------------- > libavcodec/aarch64/neon.S | 11 +++++++++++ > 2 files changed, 16 insertions(+), 14 deletions(-) > > diff --git a/libavcodec/aarch64/hevcdsp_idct_neon.S b/libavcodec/aarch64/hevcdsp_idct_neon.S > index 467cb0f48a..3e59dd20bb 100644 > --- a/libavcodec/aarch64/hevcdsp_idct_neon.S > +++ b/libavcodec/aarch64/hevcdsp_idct_neon.S > @@ -5,7 +5,7 @@ > * > * Ported from arm/hevcdsp_idct_neon.S by > * Copyright (c) 2020 Reimar Döffinger > - * Copyright (c) 2020 J. Dekker > + * Copyright (c) 2023 J. Dekker <jdek@itanimul.li> > * > * This file is part of FFmpeg. > * > @@ -38,13 +38,6 @@ const trans, align=4 > .short 31, 22, 13, 4 > endconst > > -.macro clip2 in1, in2, min, max > - smax \in1, \in1, \min > - smax \in2, \in2, \min > - smin \in1, \in1, \max > - smin \in2, \in2, \max > -.endm > - > function ff_hevc_add_residual_4x4_8_neon, export=1 > ld1 {v0.8h-v1.8h}, [x1] > ld1 {v2.s}[0], [x0], x2 > @@ -182,7 +175,7 @@ function hevc_add_residual_4x4_16_neon, export=0 > ld1 {v3.d}[1], [x12], x2 > movi v4.8h, #0 > sqadd v1.8h, v1.8h, v3.8h > - clip2 v0.8h, v1.8h, v4.8h, v21.8h > + clip v4.8h, v21.8h, v0.8h, v1.8h > st1 {v0.d}[0], [x0], x2 > st1 {v0.d}[1], [x0], x2 > st1 {v1.d}[0], [x0], x2 > @@ -201,7 +194,7 @@ function hevc_add_residual_8x8_16_neon, export=0 > sqadd v0.8h, v0.8h, v2.8h > ld1 {v3.8h}, [x12] > sqadd v1.8h, v1.8h, v3.8h > - clip2 v0.8h, v1.8h, v4.8h, v21.8h > + clip v4.8h, v21.8h, v0.8h, v1.8h > st1 {v0.8h}, [x0], x2 > st1 {v1.8h}, [x12], x2 > bne 1b > @@ -221,8 +214,7 @@ function hevc_add_residual_16x16_16_neon, export=0 > sqadd v1.8h, v1.8h, v17.8h > sqadd v2.8h, v2.8h, v18.8h > sqadd v3.8h, v3.8h, v19.8h > - clip2 v0.8h, v1.8h, v20.8h, v21.8h > - clip2 v2.8h, v3.8h, v20.8h, v21.8h > + clip v20.8h, v21.8h, v0.8h, v1.8h, v2.8h, v3.8h > st1 {v0.8h-v1.8h}, [x0], x2 > st1 {v2.8h-v3.8h}, [x12], x2 > bne 1b > @@ -239,8 +231,7 @@ function hevc_add_residual_32x32_16_neon, export=0 > sqadd v1.8h, v1.8h, v17.8h > sqadd v2.8h, v2.8h, v18.8h > sqadd v3.8h, v3.8h, v19.8h > - clip2 v0.8h, v1.8h, v20.8h, v21.8h > - clip2 v2.8h, v3.8h, v20.8h, v21.8h > + clip v20.8h, v21.8h, v0.8h, v1.8h, v2.8h, v3.8h > st1 {v0.8h-v3.8h}, [x0], x2 > bne 1b > ret > diff --git a/libavcodec/aarch64/neon.S b/libavcodec/aarch64/neon.S > index 1ad32c359d..bc105e4861 100644 > --- a/libavcodec/aarch64/neon.S > +++ b/libavcodec/aarch64/neon.S > @@ -1,6 +1,8 @@ > /* > * This file is part of FFmpeg. > * > + * Copyright (c) 2023 J. Dekker <jdek@itanimul.li> > + * > * FFmpeg is free software; you can redistribute it and/or > * modify it under the terms of the GNU Lesser General Public > * License as published by the Free Software Foundation; either > @@ -16,6 +18,15 @@ > * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > */ > > +.macro clip min, max, regs:vararg > +.irp x, \regs > + smax \x, \x, \min > +.endr > +.irp x, \regs > + smin \x, \x, \max > +.endr > +.endm > + LGTM, the vararg argument handling looks neat here. // Martin
diff --git a/libavcodec/aarch64/hevcdsp_idct_neon.S b/libavcodec/aarch64/hevcdsp_idct_neon.S index 467cb0f48a..3e59dd20bb 100644 --- a/libavcodec/aarch64/hevcdsp_idct_neon.S +++ b/libavcodec/aarch64/hevcdsp_idct_neon.S @@ -5,7 +5,7 @@ * * Ported from arm/hevcdsp_idct_neon.S by * Copyright (c) 2020 Reimar Döffinger - * Copyright (c) 2020 J. Dekker + * Copyright (c) 2023 J. Dekker <jdek@itanimul.li> * * This file is part of FFmpeg. * @@ -38,13 +38,6 @@ const trans, align=4 .short 31, 22, 13, 4 endconst -.macro clip2 in1, in2, min, max - smax \in1, \in1, \min - smax \in2, \in2, \min - smin \in1, \in1, \max - smin \in2, \in2, \max -.endm - function ff_hevc_add_residual_4x4_8_neon, export=1 ld1 {v0.8h-v1.8h}, [x1] ld1 {v2.s}[0], [x0], x2 @@ -182,7 +175,7 @@ function hevc_add_residual_4x4_16_neon, export=0 ld1 {v3.d}[1], [x12], x2 movi v4.8h, #0 sqadd v1.8h, v1.8h, v3.8h - clip2 v0.8h, v1.8h, v4.8h, v21.8h + clip v4.8h, v21.8h, v0.8h, v1.8h st1 {v0.d}[0], [x0], x2 st1 {v0.d}[1], [x0], x2 st1 {v1.d}[0], [x0], x2 @@ -201,7 +194,7 @@ function hevc_add_residual_8x8_16_neon, export=0 sqadd v0.8h, v0.8h, v2.8h ld1 {v3.8h}, [x12] sqadd v1.8h, v1.8h, v3.8h - clip2 v0.8h, v1.8h, v4.8h, v21.8h + clip v4.8h, v21.8h, v0.8h, v1.8h st1 {v0.8h}, [x0], x2 st1 {v1.8h}, [x12], x2 bne 1b @@ -221,8 +214,7 @@ function hevc_add_residual_16x16_16_neon, export=0 sqadd v1.8h, v1.8h, v17.8h sqadd v2.8h, v2.8h, v18.8h sqadd v3.8h, v3.8h, v19.8h - clip2 v0.8h, v1.8h, v20.8h, v21.8h - clip2 v2.8h, v3.8h, v20.8h, v21.8h + clip v20.8h, v21.8h, v0.8h, v1.8h, v2.8h, v3.8h st1 {v0.8h-v1.8h}, [x0], x2 st1 {v2.8h-v3.8h}, [x12], x2 bne 1b @@ -239,8 +231,7 @@ function hevc_add_residual_32x32_16_neon, export=0 sqadd v1.8h, v1.8h, v17.8h sqadd v2.8h, v2.8h, v18.8h sqadd v3.8h, v3.8h, v19.8h - clip2 v0.8h, v1.8h, v20.8h, v21.8h - clip2 v2.8h, v3.8h, v20.8h, v21.8h + clip v20.8h, v21.8h, v0.8h, v1.8h, v2.8h, v3.8h st1 {v0.8h-v3.8h}, [x0], x2 bne 1b ret diff --git a/libavcodec/aarch64/neon.S b/libavcodec/aarch64/neon.S index 1ad32c359d..bc105e4861 100644 --- a/libavcodec/aarch64/neon.S +++ b/libavcodec/aarch64/neon.S @@ -1,6 +1,8 @@ /* * This file is part of FFmpeg. * + * Copyright (c) 2023 J. Dekker <jdek@itanimul.li> + * * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either @@ -16,6 +18,15 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +.macro clip min, max, regs:vararg +.irp x, \regs + smax \x, \x, \min +.endr +.irp x, \regs + smin \x, \x, \max +.endr +.endm + .macro transpose_8x8B r0, r1, r2, r3, r4, r5, r6, r7, r8, r9 trn1 \r8\().8B, \r0\().8B, \r1\().8B trn2 \r9\().8B, \r0\().8B, \r1\().8B
Signed-off-by: J. Dekker <jdek@itanimul.li> --- libavcodec/aarch64/hevcdsp_idct_neon.S | 19 +++++-------------- libavcodec/aarch64/neon.S | 11 +++++++++++ 2 files changed, 16 insertions(+), 14 deletions(-)