[FFmpeg-devel] lavc/aarch64/simple_idct: separate macro arguments with commas

Submitted by Matthieu Bouron on May 9, 2017, 9:08 p.m.

Details

Message ID CAOmVQXHESPuyOiaWG243DfmVTEuqGjp4f59qT_1wQMsdtBoxtA@mail.gmail.com
State New
Headers show

Commit Message

Matthieu Bouron May 9, 2017, 9:08 p.m.
On Sun, May 7, 2017 at 11:05 AM, Matthieu Bouron <matthieu.bouron@gmail.com>
wrote:

>
>
> Le 2 mai 2017 12:01 PM, "Benoit Fouet" <benoit.fouet@free.fr> a écrit :
>
> Hi,
>
>
> On 28/04/2017 21:58, Matthieu Bouron wrote:
> > Untested: fixes ticket #6324.
> > ---
> >  libavcodec/aarch64/simple_idct_neon.S | 12 ++++++------
> >  1 file changed, 6 insertions(+), 6 deletions(-)
> >
> > diff --git a/libavcodec/aarch64/simple_idct_neon.S
> b/libavcodec/aarch64/simple_idct_neon.S
> > index 52273420f9..d31f72a609 100644
> > --- a/libavcodec/aarch64/simple_idct_neon.S
> > +++ b/libavcodec/aarch64/simple_idct_neon.S
> > @@ -61,19 +61,19 @@ endconst
> >          br              x10
> >  .endm
> >
> > -.macro smull1 a b c
> > +.macro smull1 a, b, c
> >          smull           \a, \b, \c
> >  .endm
> >
> > -.macro smlal1 a b c
> > +.macro smlal1 a, b, c
> >          smlal           \a, \b, \c
> >  .endm
> >
> > -.macro smlsl1 a b c
> > +.macro smlsl1 a, b, c
> >          smlsl           \a, \b, \c
> >  .endm
> >
> > -.macro idct_col4_top y1 y2 y3 y4 i l
> > +.macro idct_col4_top y1, y2, y3, y4, i, l
> >          smull\i         v7.4S,  \y3\().\l, z2
> >          smull\i         v16.4S, \y3\().\l, z6
> >          smull\i         v17.4S, \y2\().\l, z1
> > @@ -91,7 +91,7 @@ endconst
> >          smlsl\i         v6.4S,  \y4\().\l, z5
> >  .endm
> >
> > -.macro idct_row4_neon y1 y2 y3 y4 pass
> > +.macro idct_row4_neon y1, y2, y3, y4, pass
> >          ld1             {\y1\().2D-\y2\().2D}, [x2], #32
> >          movi            v23.4S, #1<<2, lsl #8
> >          orr             v5.16B, \y1\().16B, \y2\().16B
> > @@ -153,7 +153,7 @@ endconst
> >          trn2            \y4\().4S, v17.4S, v19.4S
> >  .endm
> >
> > -.macro declare_idct_col4_neon i l
> > +.macro declare_idct_col4_neon i, l
> >  function idct_col4_neon\i
> >          dup             v23.4H, z4c
> >  .if \i == 1
>
> Sounds sane, but shouldn't we be doing this for all instances of
> multiple arguments macros without commas?
>
>
> Sure, I may have missed some. I will work again on this patch on Tuesday
> as I will have access to an apple machine (and hopefully fix the build
> without gas-preprocessor).
>
> Sorry for the delay,
> Matthieu
>
>
Updated patch attached:
  * add missing commas to separate macro arguments
  * passes .4H/.8H as macro arguments instead of .4H/.8H (the later form
being interpreted as an hexadecimal value, ie: 4/8).

Comments

Matthieu Bouron May 10, 2017, 6:23 p.m.
On Tue, May 09, 2017 at 11:08:48PM +0200, Matthieu Bouron wrote:
> On Sun, May 7, 2017 at 11:05 AM, Matthieu Bouron <matthieu.bouron@gmail.com>
> wrote:
> 
> >
> >
> > Le 2 mai 2017 12:01 PM, "Benoit Fouet" <benoit.fouet@free.fr> a écrit :
> >
> > Hi,
> >
> >
> > On 28/04/2017 21:58, Matthieu Bouron wrote:
> > > Untested: fixes ticket #6324.
> > > ---
> > >  libavcodec/aarch64/simple_idct_neon.S | 12 ++++++------
> > >  1 file changed, 6 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/libavcodec/aarch64/simple_idct_neon.S
> > b/libavcodec/aarch64/simple_idct_neon.S
> > > index 52273420f9..d31f72a609 100644
> > > --- a/libavcodec/aarch64/simple_idct_neon.S
> > > +++ b/libavcodec/aarch64/simple_idct_neon.S
> > > @@ -61,19 +61,19 @@ endconst
> > >          br              x10
> > >  .endm
> > >
> > > -.macro smull1 a b c
> > > +.macro smull1 a, b, c
> > >          smull           \a, \b, \c
> > >  .endm
> > >
> > > -.macro smlal1 a b c
> > > +.macro smlal1 a, b, c
> > >          smlal           \a, \b, \c
> > >  .endm
> > >
> > > -.macro smlsl1 a b c
> > > +.macro smlsl1 a, b, c
> > >          smlsl           \a, \b, \c
> > >  .endm
> > >
> > > -.macro idct_col4_top y1 y2 y3 y4 i l
> > > +.macro idct_col4_top y1, y2, y3, y4, i, l
> > >          smull\i         v7.4S,  \y3\().\l, z2
> > >          smull\i         v16.4S, \y3\().\l, z6
> > >          smull\i         v17.4S, \y2\().\l, z1
> > > @@ -91,7 +91,7 @@ endconst
> > >          smlsl\i         v6.4S,  \y4\().\l, z5
> > >  .endm
> > >
> > > -.macro idct_row4_neon y1 y2 y3 y4 pass
> > > +.macro idct_row4_neon y1, y2, y3, y4, pass
> > >          ld1             {\y1\().2D-\y2\().2D}, [x2], #32
> > >          movi            v23.4S, #1<<2, lsl #8
> > >          orr             v5.16B, \y1\().16B, \y2\().16B
> > > @@ -153,7 +153,7 @@ endconst
> > >          trn2            \y4\().4S, v17.4S, v19.4S
> > >  .endm
> > >
> > > -.macro declare_idct_col4_neon i l
> > > +.macro declare_idct_col4_neon i, l
> > >  function idct_col4_neon\i
> > >          dup             v23.4H, z4c
> > >  .if \i == 1
> >
> > Sounds sane, but shouldn't we be doing this for all instances of
> > multiple arguments macros without commas?
> >
> >
> > Sure, I may have missed some. I will work again on this patch on Tuesday
> > as I will have access to an apple machine (and hopefully fix the build
> > without gas-preprocessor).
> >
> > Sorry for the delay,
> > Matthieu
> >
> >
> Updated patch attached:
>   * add missing commas to separate macro arguments
>   * passes .4H/.8H as macro arguments instead of .4H/.8H (the later form
> being interpreted as an hexadecimal value, ie: 4/8).

> From e27ac0f3a8b6436a7530ee5c5c514bfdfac4a558 Mon Sep 17 00:00:00 2001
> From: Matthieu Bouron <matthieu.bouron@gmail.com>
> Date: Fri, 28 Apr 2017 21:58:55 +0200
> Subject: [PATCH] lavc/aarch64/simple_idct: fix iOS build without
>  gas-preprocessor
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
> 
> Separates macro arguments with commas and passes .4H/.8H as macro
> arguments instead of 4H/8H (the later form being interpreted as an
> hexadecimal value).
> 
> Fixes ticket #6324.
> 
> Suggested-by: Martin Storsjö <martin@martin.st>
> ---
>  libavcodec/aarch64/simple_idct_neon.S | 74 +++++++++++++++++------------------
>  1 file changed, 37 insertions(+), 37 deletions(-)
> 
> diff --git a/libavcodec/aarch64/simple_idct_neon.S b/libavcodec/aarch64/simple_idct_neon.S
> index 52273420f9..92987985d2 100644
> --- a/libavcodec/aarch64/simple_idct_neon.S
> +++ b/libavcodec/aarch64/simple_idct_neon.S
> @@ -61,37 +61,37 @@ endconst
>          br              x10
>  .endm
>  
> -.macro smull1 a b c
> +.macro smull1 a, b, c
>          smull           \a, \b, \c
>  .endm
>  
> -.macro smlal1 a b c
> +.macro smlal1 a, b, c
>          smlal           \a, \b, \c
>  .endm
>  
> -.macro smlsl1 a b c
> +.macro smlsl1 a, b, c
>          smlsl           \a, \b, \c
>  .endm
>  
> -.macro idct_col4_top y1 y2 y3 y4 i l
> -        smull\i         v7.4S,  \y3\().\l, z2
> -        smull\i         v16.4S, \y3\().\l, z6
> -        smull\i         v17.4S, \y2\().\l, z1
> +.macro idct_col4_top y1, y2, y3, y4, i, l
> +        smull\i         v7.4S,  \y3\l, z1
> +        smull\i         v16.4S, \y3\l, z6
> +        smull\i         v17.4S, \y2\l, z1
>          add             v19.4S, v23.4S, v7.4S
> -        smull\i         v18.4S, \y2\().\l, z3
> +        smull\i         v18.4S, \y2\l, z3
>          add             v20.4S, v23.4S, v16.4S
> -        smull\i         v5.4S,  \y2\().\l, z5
> +        smull\i         v5.4S,  \y2\l, z5
>          sub             v21.4S, v23.4S, v16.4S
> -        smull\i         v6.4S,  \y2\().\l, z7
> +        smull\i         v6.4S,  \y2\l, z7
>          sub             v22.4S, v23.4S, v7.4S
>  
> -        smlal\i         v17.4S, \y4\().\l, z3
> -        smlsl\i         v18.4S, \y4\().\l, z7
> -        smlsl\i         v5.4S,  \y4\().\l, z1
> -        smlsl\i         v6.4S,  \y4\().\l, z5
> +        smlal\i         v17.4S, \y4\l, z3
> +        smlsl\i         v18.4S, \y4\l, z7
> +        smlsl\i         v5.4S,  \y4\l, z1
> +        smlsl\i         v6.4S,  \y4\l, z5
>  .endm
>  
> -.macro idct_row4_neon y1 y2 y3 y4 pass
> +.macro idct_row4_neon y1, y2, y3, y4, pass
>          ld1             {\y1\().2D-\y2\().2D}, [x2], #32
>          movi            v23.4S, #1<<2, lsl #8
>          orr             v5.16B, \y1\().16B, \y2\().16B
> @@ -101,7 +101,7 @@ endconst
>          mov             x3, v5.D[1]
>          smlal           v23.4S, \y1\().4H, z4
>  
> -        idct_col4_top   \y1 \y2 \y3 \y4 1 4H
> +        idct_col4_top   \y1, \y2, \y3, \y4, 1, .4H
>  
>          cmp             x3, #0
>          beq             \pass\()f
> @@ -153,7 +153,7 @@ endconst
>          trn2            \y4\().4S, v17.4S, v19.4S
>  .endm
>  
> -.macro declare_idct_col4_neon i l
> +.macro declare_idct_col4_neon i, l
>  function idct_col4_neon\i
>          dup             v23.4H, z4c
>  .if \i == 1
> @@ -164,14 +164,14 @@ function idct_col4_neon\i
>  .endif
>          smull           v23.4S, v23.4H, z4
>  
> -        idct_col4_top   v24 v25 v26 v27 \i \l
> +        idct_col4_top   v24, v25, v26, v27, \i, \l
>  
>          mov             x4, v28.D[\i - 1]
>          mov             x5, v29.D[\i - 1]
>          cmp             x4, #0
>          beq             1f
>  
> -        smull\i         v7.4S,  v28.\l, z4
> +        smull\i         v7.4S,  v28\l,  z4
>          add             v19.4S, v19.4S, v7.4S
>          sub             v20.4S, v20.4S, v7.4S
>          sub             v21.4S, v21.4S, v7.4S
> @@ -181,17 +181,17 @@ function idct_col4_neon\i
>          cmp             x5, #0
>          beq             2f
>  
> -        smlal\i         v17.4S, v29.\l, z5
> -        smlsl\i         v18.4S, v29.\l, z1
> -        smlal\i         v5.4S,  v29.\l, z7
> -        smlal\i         v6.4S,  v29.\l, z3
> +        smlal\i         v17.4S, v29\l, z5
> +        smlsl\i         v18.4S, v29\l, z1
> +        smlal\i         v5.4S,  v29\l, z7
> +        smlal\i         v6.4S,  v29\l, z3
>  
>  2:      mov             x5, v31.D[\i - 1]
>          cmp             x4, #0
>          beq             3f
>  
> -        smull\i         v7.4S,  v30.\l, z6
> -        smull\i         v16.4S, v30.\l, z2
> +        smull\i         v7.4S,  v30\l, z6
> +        smull\i         v16.4S, v30\l, z2
>          add             v19.4S, v19.4S, v7.4S
>          sub             v22.4S, v22.4S, v7.4S
>          sub             v20.4S, v20.4S, v16.4S
> @@ -200,10 +200,10 @@ function idct_col4_neon\i
>  3:      cmp             x5, #0
>          beq             4f
>  
> -        smlal\i         v17.4S, v31.\l, z7
> -        smlsl\i         v18.4S, v31.\l, z5
> -        smlal\i         v5.4S,  v31.\l, z3
> -        smlsl\i         v6.4S,  v31.\l, z1
> +        smlal\i         v17.4S, v31\l, z7
> +        smlsl\i         v18.4S, v31\l, z5
> +        smlal\i         v5.4S,  v31\l, z3
> +        smlsl\i         v6.4S,  v31\l, z1
>  
>  4:      addhn           v7.4H, v19.4S, v17.4S
>          addhn2          v7.8H, v20.4S, v18.4S
> @@ -219,14 +219,14 @@ function idct_col4_neon\i
>  endfunc
>  .endm
>  
> -declare_idct_col4_neon 1 4H
> -declare_idct_col4_neon 2 8H
> +declare_idct_col4_neon 1, .4H
> +declare_idct_col4_neon 2, .8H
>  
>  function ff_simple_idct_put_neon, export=1
>          idct_start      x2
>  
> -        idct_row4_neon  v24 v25 v26 v27 1
> -        idct_row4_neon  v28 v29 v30 v31 2
> +        idct_row4_neon  v24, v25, v26, v27, 1
> +        idct_row4_neon  v28, v29, v30, v31, 2
>          bl              idct_col4_neon1
>  
>          sqshrun         v1.8B,  v7.8H, #COL_SHIFT-16
> @@ -263,8 +263,8 @@ endfunc
>  function ff_simple_idct_add_neon, export=1
>          idct_start      x2
>  
> -        idct_row4_neon  v24 v25 v26 v27 1
> -        idct_row4_neon  v28 v29 v30 v31 2
> +        idct_row4_neon  v24, v25, v26, v27, 1
> +        idct_row4_neon  v28, v29, v30, v31, 2
>          bl              idct_col4_neon1
>  
>          sshr            v1.8H, V7.8H, #COL_SHIFT-16
> @@ -328,8 +328,8 @@ function ff_simple_idct_neon, export=1
>          idct_start      x0
>  
>          mov             x2,  x0
> -        idct_row4_neon  v24 v25 v26 v27 1
> -        idct_row4_neon  v28 v29 v30 v31 2
> +        idct_row4_neon  v24, v25, v26, v27, 1
> +        idct_row4_neon  v28, v29, v30, v31, 2
>          add             x2, x2, #-128
>          bl              idct_col4_neon1
>  
> -- 
> 2.12.0
> 

If there is no objection, I will push the patch tomorrow.

Matthieu
Matthieu Bouron May 11, 2017, 12:31 p.m.
On Wed, May 10, 2017 at 08:23:02PM +0200, Matthieu Bouron wrote:
> On Tue, May 09, 2017 at 11:08:48PM +0200, Matthieu Bouron wrote:
> > On Sun, May 7, 2017 at 11:05 AM, Matthieu Bouron <matthieu.bouron@gmail.com>
> > wrote:
> > 
> > >
> > >
> > > Le 2 mai 2017 12:01 PM, "Benoit Fouet" <benoit.fouet@free.fr> a écrit :
> > >
> > > Hi,
> > >
> > >
> > > On 28/04/2017 21:58, Matthieu Bouron wrote:
> > > > Untested: fixes ticket #6324.
> > > > ---
> > > >  libavcodec/aarch64/simple_idct_neon.S | 12 ++++++------
> > > >  1 file changed, 6 insertions(+), 6 deletions(-)
> > > >
> > > > diff --git a/libavcodec/aarch64/simple_idct_neon.S
> > > b/libavcodec/aarch64/simple_idct_neon.S
> > > > index 52273420f9..d31f72a609 100644
> > > > --- a/libavcodec/aarch64/simple_idct_neon.S
> > > > +++ b/libavcodec/aarch64/simple_idct_neon.S
> > > > @@ -61,19 +61,19 @@ endconst
> > > >          br              x10
> > > >  .endm
> > > >
> > > > -.macro smull1 a b c
> > > > +.macro smull1 a, b, c
> > > >          smull           \a, \b, \c
> > > >  .endm
> > > >
> > > > -.macro smlal1 a b c
> > > > +.macro smlal1 a, b, c
> > > >          smlal           \a, \b, \c
> > > >  .endm
> > > >
> > > > -.macro smlsl1 a b c
> > > > +.macro smlsl1 a, b, c
> > > >          smlsl           \a, \b, \c
> > > >  .endm
> > > >
> > > > -.macro idct_col4_top y1 y2 y3 y4 i l
> > > > +.macro idct_col4_top y1, y2, y3, y4, i, l
> > > >          smull\i         v7.4S,  \y3\().\l, z2
> > > >          smull\i         v16.4S, \y3\().\l, z6
> > > >          smull\i         v17.4S, \y2\().\l, z1
> > > > @@ -91,7 +91,7 @@ endconst
> > > >          smlsl\i         v6.4S,  \y4\().\l, z5
> > > >  .endm
> > > >
> > > > -.macro idct_row4_neon y1 y2 y3 y4 pass
> > > > +.macro idct_row4_neon y1, y2, y3, y4, pass
> > > >          ld1             {\y1\().2D-\y2\().2D}, [x2], #32
> > > >          movi            v23.4S, #1<<2, lsl #8
> > > >          orr             v5.16B, \y1\().16B, \y2\().16B
> > > > @@ -153,7 +153,7 @@ endconst
> > > >          trn2            \y4\().4S, v17.4S, v19.4S
> > > >  .endm
> > > >
> > > > -.macro declare_idct_col4_neon i l
> > > > +.macro declare_idct_col4_neon i, l
> > > >  function idct_col4_neon\i
> > > >          dup             v23.4H, z4c
> > > >  .if \i == 1
> > >
> > > Sounds sane, but shouldn't we be doing this for all instances of
> > > multiple arguments macros without commas?
> > >
> > >
> > > Sure, I may have missed some. I will work again on this patch on Tuesday
> > > as I will have access to an apple machine (and hopefully fix the build
> > > without gas-preprocessor).
> > >
> > > Sorry for the delay,
> > > Matthieu
> > >
> > >
> > Updated patch attached:
> >   * add missing commas to separate macro arguments
> >   * passes .4H/.8H as macro arguments instead of .4H/.8H (the later form
> > being interpreted as an hexadecimal value, ie: 4/8).
> 
> > From e27ac0f3a8b6436a7530ee5c5c514bfdfac4a558 Mon Sep 17 00:00:00 2001
> > From: Matthieu Bouron <matthieu.bouron@gmail.com>
> > Date: Fri, 28 Apr 2017 21:58:55 +0200
> > Subject: [PATCH] lavc/aarch64/simple_idct: fix iOS build without
> >  gas-preprocessor
> > MIME-Version: 1.0
> > Content-Type: text/plain; charset=UTF-8
> > Content-Transfer-Encoding: 8bit
> > 
> > Separates macro arguments with commas and passes .4H/.8H as macro
> > arguments instead of 4H/8H (the later form being interpreted as an
> > hexadecimal value).
> > 
> > Fixes ticket #6324.
> > 
> > Suggested-by: Martin Storsjö <martin@martin.st>
> > ---
> >  libavcodec/aarch64/simple_idct_neon.S | 74 +++++++++++++++++------------------
> >  1 file changed, 37 insertions(+), 37 deletions(-)
> > 
> > diff --git a/libavcodec/aarch64/simple_idct_neon.S b/libavcodec/aarch64/simple_idct_neon.S
> > index 52273420f9..92987985d2 100644
> > --- a/libavcodec/aarch64/simple_idct_neon.S
> > +++ b/libavcodec/aarch64/simple_idct_neon.S
> > @@ -61,37 +61,37 @@ endconst
> >          br              x10
> >  .endm
> >  
> > -.macro smull1 a b c
> > +.macro smull1 a, b, c
> >          smull           \a, \b, \c
> >  .endm
> >  
> > -.macro smlal1 a b c
> > +.macro smlal1 a, b, c
> >          smlal           \a, \b, \c
> >  .endm
> >  
> > -.macro smlsl1 a b c
> > +.macro smlsl1 a, b, c
> >          smlsl           \a, \b, \c
> >  .endm
> >  
> > -.macro idct_col4_top y1 y2 y3 y4 i l
> > -        smull\i         v7.4S,  \y3\().\l, z2
> > -        smull\i         v16.4S, \y3\().\l, z6
> > -        smull\i         v17.4S, \y2\().\l, z1
> > +.macro idct_col4_top y1, y2, y3, y4, i, l
> > +        smull\i         v7.4S,  \y3\l, z1
> > +        smull\i         v16.4S, \y3\l, z6
> > +        smull\i         v17.4S, \y2\l, z1
> >          add             v19.4S, v23.4S, v7.4S
> > -        smull\i         v18.4S, \y2\().\l, z3
> > +        smull\i         v18.4S, \y2\l, z3
> >          add             v20.4S, v23.4S, v16.4S
> > -        smull\i         v5.4S,  \y2\().\l, z5
> > +        smull\i         v5.4S,  \y2\l, z5
> >          sub             v21.4S, v23.4S, v16.4S
> > -        smull\i         v6.4S,  \y2\().\l, z7
> > +        smull\i         v6.4S,  \y2\l, z7
> >          sub             v22.4S, v23.4S, v7.4S
> >  
> > -        smlal\i         v17.4S, \y4\().\l, z3
> > -        smlsl\i         v18.4S, \y4\().\l, z7
> > -        smlsl\i         v5.4S,  \y4\().\l, z1
> > -        smlsl\i         v6.4S,  \y4\().\l, z5
> > +        smlal\i         v17.4S, \y4\l, z3
> > +        smlsl\i         v18.4S, \y4\l, z7
> > +        smlsl\i         v5.4S,  \y4\l, z1
> > +        smlsl\i         v6.4S,  \y4\l, z5
> >  .endm
> >  
> > -.macro idct_row4_neon y1 y2 y3 y4 pass
> > +.macro idct_row4_neon y1, y2, y3, y4, pass
> >          ld1             {\y1\().2D-\y2\().2D}, [x2], #32
> >          movi            v23.4S, #1<<2, lsl #8
> >          orr             v5.16B, \y1\().16B, \y2\().16B
> > @@ -101,7 +101,7 @@ endconst
> >          mov             x3, v5.D[1]
> >          smlal           v23.4S, \y1\().4H, z4
> >  
> > -        idct_col4_top   \y1 \y2 \y3 \y4 1 4H
> > +        idct_col4_top   \y1, \y2, \y3, \y4, 1, .4H
> >  
> >          cmp             x3, #0
> >          beq             \pass\()f
> > @@ -153,7 +153,7 @@ endconst
> >          trn2            \y4\().4S, v17.4S, v19.4S
> >  .endm
> >  
> > -.macro declare_idct_col4_neon i l
> > +.macro declare_idct_col4_neon i, l
> >  function idct_col4_neon\i
> >          dup             v23.4H, z4c
> >  .if \i == 1
> > @@ -164,14 +164,14 @@ function idct_col4_neon\i
> >  .endif
> >          smull           v23.4S, v23.4H, z4
> >  
> > -        idct_col4_top   v24 v25 v26 v27 \i \l
> > +        idct_col4_top   v24, v25, v26, v27, \i, \l
> >  
> >          mov             x4, v28.D[\i - 1]
> >          mov             x5, v29.D[\i - 1]
> >          cmp             x4, #0
> >          beq             1f
> >  
> > -        smull\i         v7.4S,  v28.\l, z4
> > +        smull\i         v7.4S,  v28\l,  z4
> >          add             v19.4S, v19.4S, v7.4S
> >          sub             v20.4S, v20.4S, v7.4S
> >          sub             v21.4S, v21.4S, v7.4S
> > @@ -181,17 +181,17 @@ function idct_col4_neon\i
> >          cmp             x5, #0
> >          beq             2f
> >  
> > -        smlal\i         v17.4S, v29.\l, z5
> > -        smlsl\i         v18.4S, v29.\l, z1
> > -        smlal\i         v5.4S,  v29.\l, z7
> > -        smlal\i         v6.4S,  v29.\l, z3
> > +        smlal\i         v17.4S, v29\l, z5
> > +        smlsl\i         v18.4S, v29\l, z1
> > +        smlal\i         v5.4S,  v29\l, z7
> > +        smlal\i         v6.4S,  v29\l, z3
> >  
> >  2:      mov             x5, v31.D[\i - 1]
> >          cmp             x4, #0
> >          beq             3f
> >  
> > -        smull\i         v7.4S,  v30.\l, z6
> > -        smull\i         v16.4S, v30.\l, z2
> > +        smull\i         v7.4S,  v30\l, z6
> > +        smull\i         v16.4S, v30\l, z2
> >          add             v19.4S, v19.4S, v7.4S
> >          sub             v22.4S, v22.4S, v7.4S
> >          sub             v20.4S, v20.4S, v16.4S
> > @@ -200,10 +200,10 @@ function idct_col4_neon\i
> >  3:      cmp             x5, #0
> >          beq             4f
> >  
> > -        smlal\i         v17.4S, v31.\l, z7
> > -        smlsl\i         v18.4S, v31.\l, z5
> > -        smlal\i         v5.4S,  v31.\l, z3
> > -        smlsl\i         v6.4S,  v31.\l, z1
> > +        smlal\i         v17.4S, v31\l, z7
> > +        smlsl\i         v18.4S, v31\l, z5
> > +        smlal\i         v5.4S,  v31\l, z3
> > +        smlsl\i         v6.4S,  v31\l, z1
> >  
> >  4:      addhn           v7.4H, v19.4S, v17.4S
> >          addhn2          v7.8H, v20.4S, v18.4S
> > @@ -219,14 +219,14 @@ function idct_col4_neon\i
> >  endfunc
> >  .endm
> >  
> > -declare_idct_col4_neon 1 4H
> > -declare_idct_col4_neon 2 8H
> > +declare_idct_col4_neon 1, .4H
> > +declare_idct_col4_neon 2, .8H
> >  
> >  function ff_simple_idct_put_neon, export=1
> >          idct_start      x2
> >  
> > -        idct_row4_neon  v24 v25 v26 v27 1
> > -        idct_row4_neon  v28 v29 v30 v31 2
> > +        idct_row4_neon  v24, v25, v26, v27, 1
> > +        idct_row4_neon  v28, v29, v30, v31, 2
> >          bl              idct_col4_neon1
> >  
> >          sqshrun         v1.8B,  v7.8H, #COL_SHIFT-16
> > @@ -263,8 +263,8 @@ endfunc
> >  function ff_simple_idct_add_neon, export=1
> >          idct_start      x2
> >  
> > -        idct_row4_neon  v24 v25 v26 v27 1
> > -        idct_row4_neon  v28 v29 v30 v31 2
> > +        idct_row4_neon  v24, v25, v26, v27, 1
> > +        idct_row4_neon  v28, v29, v30, v31, 2
> >          bl              idct_col4_neon1
> >  
> >          sshr            v1.8H, V7.8H, #COL_SHIFT-16
> > @@ -328,8 +328,8 @@ function ff_simple_idct_neon, export=1
> >          idct_start      x0
> >  
> >          mov             x2,  x0
> > -        idct_row4_neon  v24 v25 v26 v27 1
> > -        idct_row4_neon  v28 v29 v30 v31 2
> > +        idct_row4_neon  v24, v25, v26, v27, 1
> > +        idct_row4_neon  v28, v29, v30, v31, 2
> >          add             x2, x2, #-128
> >          bl              idct_col4_neon1
> >  
> > -- 
> > 2.12.0
> > 
> 
> If there is no objection, I will push the patch tomorrow.

Patch applied.

Patch hide | download patch | download mbox

From e27ac0f3a8b6436a7530ee5c5c514bfdfac4a558 Mon Sep 17 00:00:00 2001
From: Matthieu Bouron <matthieu.bouron@gmail.com>
Date: Fri, 28 Apr 2017 21:58:55 +0200
Subject: [PATCH] lavc/aarch64/simple_idct: fix iOS build without
 gas-preprocessor
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Separates macro arguments with commas and passes .4H/.8H as macro
arguments instead of 4H/8H (the later form being interpreted as an
hexadecimal value).

Fixes ticket #6324.

Suggested-by: Martin Storsjö <martin@martin.st>
---
 libavcodec/aarch64/simple_idct_neon.S | 74 +++++++++++++++++------------------
 1 file changed, 37 insertions(+), 37 deletions(-)

diff --git a/libavcodec/aarch64/simple_idct_neon.S b/libavcodec/aarch64/simple_idct_neon.S
index 52273420f9..92987985d2 100644
--- a/libavcodec/aarch64/simple_idct_neon.S
+++ b/libavcodec/aarch64/simple_idct_neon.S
@@ -61,37 +61,37 @@  endconst
         br              x10
 .endm
 
-.macro smull1 a b c
+.macro smull1 a, b, c
         smull           \a, \b, \c
 .endm
 
-.macro smlal1 a b c
+.macro smlal1 a, b, c
         smlal           \a, \b, \c
 .endm
 
-.macro smlsl1 a b c
+.macro smlsl1 a, b, c
         smlsl           \a, \b, \c
 .endm
 
-.macro idct_col4_top y1 y2 y3 y4 i l
-        smull\i         v7.4S,  \y3\().\l, z2
-        smull\i         v16.4S, \y3\().\l, z6
-        smull\i         v17.4S, \y2\().\l, z1
+.macro idct_col4_top y1, y2, y3, y4, i, l
+        smull\i         v7.4S,  \y3\l, z1
+        smull\i         v16.4S, \y3\l, z6
+        smull\i         v17.4S, \y2\l, z1
         add             v19.4S, v23.4S, v7.4S
-        smull\i         v18.4S, \y2\().\l, z3
+        smull\i         v18.4S, \y2\l, z3
         add             v20.4S, v23.4S, v16.4S
-        smull\i         v5.4S,  \y2\().\l, z5
+        smull\i         v5.4S,  \y2\l, z5
         sub             v21.4S, v23.4S, v16.4S
-        smull\i         v6.4S,  \y2\().\l, z7
+        smull\i         v6.4S,  \y2\l, z7
         sub             v22.4S, v23.4S, v7.4S
 
-        smlal\i         v17.4S, \y4\().\l, z3
-        smlsl\i         v18.4S, \y4\().\l, z7
-        smlsl\i         v5.4S,  \y4\().\l, z1
-        smlsl\i         v6.4S,  \y4\().\l, z5
+        smlal\i         v17.4S, \y4\l, z3
+        smlsl\i         v18.4S, \y4\l, z7
+        smlsl\i         v5.4S,  \y4\l, z1
+        smlsl\i         v6.4S,  \y4\l, z5
 .endm
 
-.macro idct_row4_neon y1 y2 y3 y4 pass
+.macro idct_row4_neon y1, y2, y3, y4, pass
         ld1             {\y1\().2D-\y2\().2D}, [x2], #32
         movi            v23.4S, #1<<2, lsl #8
         orr             v5.16B, \y1\().16B, \y2\().16B
@@ -101,7 +101,7 @@  endconst
         mov             x3, v5.D[1]
         smlal           v23.4S, \y1\().4H, z4
 
-        idct_col4_top   \y1 \y2 \y3 \y4 1 4H
+        idct_col4_top   \y1, \y2, \y3, \y4, 1, .4H
 
         cmp             x3, #0
         beq             \pass\()f
@@ -153,7 +153,7 @@  endconst
         trn2            \y4\().4S, v17.4S, v19.4S
 .endm
 
-.macro declare_idct_col4_neon i l
+.macro declare_idct_col4_neon i, l
 function idct_col4_neon\i
         dup             v23.4H, z4c
 .if \i == 1
@@ -164,14 +164,14 @@  function idct_col4_neon\i
 .endif
         smull           v23.4S, v23.4H, z4
 
-        idct_col4_top   v24 v25 v26 v27 \i \l
+        idct_col4_top   v24, v25, v26, v27, \i, \l
 
         mov             x4, v28.D[\i - 1]
         mov             x5, v29.D[\i - 1]
         cmp             x4, #0
         beq             1f
 
-        smull\i         v7.4S,  v28.\l, z4
+        smull\i         v7.4S,  v28\l,  z4
         add             v19.4S, v19.4S, v7.4S
         sub             v20.4S, v20.4S, v7.4S
         sub             v21.4S, v21.4S, v7.4S
@@ -181,17 +181,17 @@  function idct_col4_neon\i
         cmp             x5, #0
         beq             2f
 
-        smlal\i         v17.4S, v29.\l, z5
-        smlsl\i         v18.4S, v29.\l, z1
-        smlal\i         v5.4S,  v29.\l, z7
-        smlal\i         v6.4S,  v29.\l, z3
+        smlal\i         v17.4S, v29\l, z5
+        smlsl\i         v18.4S, v29\l, z1
+        smlal\i         v5.4S,  v29\l, z7
+        smlal\i         v6.4S,  v29\l, z3
 
 2:      mov             x5, v31.D[\i - 1]
         cmp             x4, #0
         beq             3f
 
-        smull\i         v7.4S,  v30.\l, z6
-        smull\i         v16.4S, v30.\l, z2
+        smull\i         v7.4S,  v30\l, z6
+        smull\i         v16.4S, v30\l, z2
         add             v19.4S, v19.4S, v7.4S
         sub             v22.4S, v22.4S, v7.4S
         sub             v20.4S, v20.4S, v16.4S
@@ -200,10 +200,10 @@  function idct_col4_neon\i
 3:      cmp             x5, #0
         beq             4f
 
-        smlal\i         v17.4S, v31.\l, z7
-        smlsl\i         v18.4S, v31.\l, z5
-        smlal\i         v5.4S,  v31.\l, z3
-        smlsl\i         v6.4S,  v31.\l, z1
+        smlal\i         v17.4S, v31\l, z7
+        smlsl\i         v18.4S, v31\l, z5
+        smlal\i         v5.4S,  v31\l, z3
+        smlsl\i         v6.4S,  v31\l, z1
 
 4:      addhn           v7.4H, v19.4S, v17.4S
         addhn2          v7.8H, v20.4S, v18.4S
@@ -219,14 +219,14 @@  function idct_col4_neon\i
 endfunc
 .endm
 
-declare_idct_col4_neon 1 4H
-declare_idct_col4_neon 2 8H
+declare_idct_col4_neon 1, .4H
+declare_idct_col4_neon 2, .8H
 
 function ff_simple_idct_put_neon, export=1
         idct_start      x2
 
-        idct_row4_neon  v24 v25 v26 v27 1
-        idct_row4_neon  v28 v29 v30 v31 2
+        idct_row4_neon  v24, v25, v26, v27, 1
+        idct_row4_neon  v28, v29, v30, v31, 2
         bl              idct_col4_neon1
 
         sqshrun         v1.8B,  v7.8H, #COL_SHIFT-16
@@ -263,8 +263,8 @@  endfunc
 function ff_simple_idct_add_neon, export=1
         idct_start      x2
 
-        idct_row4_neon  v24 v25 v26 v27 1
-        idct_row4_neon  v28 v29 v30 v31 2
+        idct_row4_neon  v24, v25, v26, v27, 1
+        idct_row4_neon  v28, v29, v30, v31, 2
         bl              idct_col4_neon1
 
         sshr            v1.8H, V7.8H, #COL_SHIFT-16
@@ -328,8 +328,8 @@  function ff_simple_idct_neon, export=1
         idct_start      x0
 
         mov             x2,  x0
-        idct_row4_neon  v24 v25 v26 v27 1
-        idct_row4_neon  v28 v29 v30 v31 2
+        idct_row4_neon  v24, v25, v26, v27, 1
+        idct_row4_neon  v28, v29, v30, v31, 2
         add             x2, x2, #-128
         bl              idct_col4_neon1
 
-- 
2.12.0