[FFmpeg-devel] [PATCH] lavc/aarch64/simple_idct: separate macro arguments with commas

Matthieu Bouron matthieu.bouron at gmail.com
Wed May 10 21:23:02 EEST 2017


On Tue, May 09, 2017 at 11:08:48PM +0200, Matthieu Bouron wrote:
> On Sun, May 7, 2017 at 11:05 AM, Matthieu Bouron <matthieu.bouron at gmail.com>
> wrote:
> 
> >
> >
> > Le 2 mai 2017 12:01 PM, "Benoit Fouet" <benoit.fouet at free.fr> a écrit :
> >
> > Hi,
> >
> >
> > On 28/04/2017 21:58, Matthieu Bouron wrote:
> > > Untested: fixes ticket #6324.
> > > ---
> > >  libavcodec/aarch64/simple_idct_neon.S | 12 ++++++------
> > >  1 file changed, 6 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/libavcodec/aarch64/simple_idct_neon.S
> > b/libavcodec/aarch64/simple_idct_neon.S
> > > index 52273420f9..d31f72a609 100644
> > > --- a/libavcodec/aarch64/simple_idct_neon.S
> > > +++ b/libavcodec/aarch64/simple_idct_neon.S
> > > @@ -61,19 +61,19 @@ endconst
> > >          br              x10
> > >  .endm
> > >
> > > -.macro smull1 a b c
> > > +.macro smull1 a, b, c
> > >          smull           \a, \b, \c
> > >  .endm
> > >
> > > -.macro smlal1 a b c
> > > +.macro smlal1 a, b, c
> > >          smlal           \a, \b, \c
> > >  .endm
> > >
> > > -.macro smlsl1 a b c
> > > +.macro smlsl1 a, b, c
> > >          smlsl           \a, \b, \c
> > >  .endm
> > >
> > > -.macro idct_col4_top y1 y2 y3 y4 i l
> > > +.macro idct_col4_top y1, y2, y3, y4, i, l
> > >          smull\i         v7.4S,  \y3\().\l, z2
> > >          smull\i         v16.4S, \y3\().\l, z6
> > >          smull\i         v17.4S, \y2\().\l, z1
> > > @@ -91,7 +91,7 @@ endconst
> > >          smlsl\i         v6.4S,  \y4\().\l, z5
> > >  .endm
> > >
> > > -.macro idct_row4_neon y1 y2 y3 y4 pass
> > > +.macro idct_row4_neon y1, y2, y3, y4, pass
> > >          ld1             {\y1\().2D-\y2\().2D}, [x2], #32
> > >          movi            v23.4S, #1<<2, lsl #8
> > >          orr             v5.16B, \y1\().16B, \y2\().16B
> > > @@ -153,7 +153,7 @@ endconst
> > >          trn2            \y4\().4S, v17.4S, v19.4S
> > >  .endm
> > >
> > > -.macro declare_idct_col4_neon i l
> > > +.macro declare_idct_col4_neon i, l
> > >  function idct_col4_neon\i
> > >          dup             v23.4H, z4c
> > >  .if \i == 1
> >
> > Sounds sane, but shouldn't we be doing this for all instances of
> > multiple arguments macros without commas?
> >
> >
> > Sure, I may have missed some. I will work again on this patch on Tuesday
> > as I will have access to an apple machine (and hopefully fix the build
> > without gas-preprocessor).
> >
> > Sorry for the delay,
> > Matthieu
> >
> >
> Updated patch attached:
>   * add missing commas to separate macro arguments
>   * passes .4H/.8H as macro arguments instead of .4H/.8H (the later form
> being interpreted as an hexadecimal value, ie: 4/8).

> From e27ac0f3a8b6436a7530ee5c5c514bfdfac4a558 Mon Sep 17 00:00:00 2001
> From: Matthieu Bouron <matthieu.bouron at gmail.com>
> Date: Fri, 28 Apr 2017 21:58:55 +0200
> Subject: [PATCH] lavc/aarch64/simple_idct: fix iOS build without
>  gas-preprocessor
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
> 
> Separates macro arguments with commas and passes .4H/.8H as macro
> arguments instead of 4H/8H (the later form being interpreted as an
> hexadecimal value).
> 
> Fixes ticket #6324.
> 
> Suggested-by: Martin Storsjö <martin at martin.st>
> ---
>  libavcodec/aarch64/simple_idct_neon.S | 74 +++++++++++++++++------------------
>  1 file changed, 37 insertions(+), 37 deletions(-)
> 
> diff --git a/libavcodec/aarch64/simple_idct_neon.S b/libavcodec/aarch64/simple_idct_neon.S
> index 52273420f9..92987985d2 100644
> --- a/libavcodec/aarch64/simple_idct_neon.S
> +++ b/libavcodec/aarch64/simple_idct_neon.S
> @@ -61,37 +61,37 @@ endconst
>          br              x10
>  .endm
>  
> -.macro smull1 a b c
> +.macro smull1 a, b, c
>          smull           \a, \b, \c
>  .endm
>  
> -.macro smlal1 a b c
> +.macro smlal1 a, b, c
>          smlal           \a, \b, \c
>  .endm
>  
> -.macro smlsl1 a b c
> +.macro smlsl1 a, b, c
>          smlsl           \a, \b, \c
>  .endm
>  
> -.macro idct_col4_top y1 y2 y3 y4 i l
> -        smull\i         v7.4S,  \y3\().\l, z2
> -        smull\i         v16.4S, \y3\().\l, z6
> -        smull\i         v17.4S, \y2\().\l, z1
> +.macro idct_col4_top y1, y2, y3, y4, i, l
> +        smull\i         v7.4S,  \y3\l, z1
> +        smull\i         v16.4S, \y3\l, z6
> +        smull\i         v17.4S, \y2\l, z1
>          add             v19.4S, v23.4S, v7.4S
> -        smull\i         v18.4S, \y2\().\l, z3
> +        smull\i         v18.4S, \y2\l, z3
>          add             v20.4S, v23.4S, v16.4S
> -        smull\i         v5.4S,  \y2\().\l, z5
> +        smull\i         v5.4S,  \y2\l, z5
>          sub             v21.4S, v23.4S, v16.4S
> -        smull\i         v6.4S,  \y2\().\l, z7
> +        smull\i         v6.4S,  \y2\l, z7
>          sub             v22.4S, v23.4S, v7.4S
>  
> -        smlal\i         v17.4S, \y4\().\l, z3
> -        smlsl\i         v18.4S, \y4\().\l, z7
> -        smlsl\i         v5.4S,  \y4\().\l, z1
> -        smlsl\i         v6.4S,  \y4\().\l, z5
> +        smlal\i         v17.4S, \y4\l, z3
> +        smlsl\i         v18.4S, \y4\l, z7
> +        smlsl\i         v5.4S,  \y4\l, z1
> +        smlsl\i         v6.4S,  \y4\l, z5
>  .endm
>  
> -.macro idct_row4_neon y1 y2 y3 y4 pass
> +.macro idct_row4_neon y1, y2, y3, y4, pass
>          ld1             {\y1\().2D-\y2\().2D}, [x2], #32
>          movi            v23.4S, #1<<2, lsl #8
>          orr             v5.16B, \y1\().16B, \y2\().16B
> @@ -101,7 +101,7 @@ endconst
>          mov             x3, v5.D[1]
>          smlal           v23.4S, \y1\().4H, z4
>  
> -        idct_col4_top   \y1 \y2 \y3 \y4 1 4H
> +        idct_col4_top   \y1, \y2, \y3, \y4, 1, .4H
>  
>          cmp             x3, #0
>          beq             \pass\()f
> @@ -153,7 +153,7 @@ endconst
>          trn2            \y4\().4S, v17.4S, v19.4S
>  .endm
>  
> -.macro declare_idct_col4_neon i l
> +.macro declare_idct_col4_neon i, l
>  function idct_col4_neon\i
>          dup             v23.4H, z4c
>  .if \i == 1
> @@ -164,14 +164,14 @@ function idct_col4_neon\i
>  .endif
>          smull           v23.4S, v23.4H, z4
>  
> -        idct_col4_top   v24 v25 v26 v27 \i \l
> +        idct_col4_top   v24, v25, v26, v27, \i, \l
>  
>          mov             x4, v28.D[\i - 1]
>          mov             x5, v29.D[\i - 1]
>          cmp             x4, #0
>          beq             1f
>  
> -        smull\i         v7.4S,  v28.\l, z4
> +        smull\i         v7.4S,  v28\l,  z4
>          add             v19.4S, v19.4S, v7.4S
>          sub             v20.4S, v20.4S, v7.4S
>          sub             v21.4S, v21.4S, v7.4S
> @@ -181,17 +181,17 @@ function idct_col4_neon\i
>          cmp             x5, #0
>          beq             2f
>  
> -        smlal\i         v17.4S, v29.\l, z5
> -        smlsl\i         v18.4S, v29.\l, z1
> -        smlal\i         v5.4S,  v29.\l, z7
> -        smlal\i         v6.4S,  v29.\l, z3
> +        smlal\i         v17.4S, v29\l, z5
> +        smlsl\i         v18.4S, v29\l, z1
> +        smlal\i         v5.4S,  v29\l, z7
> +        smlal\i         v6.4S,  v29\l, z3
>  
>  2:      mov             x5, v31.D[\i - 1]
>          cmp             x4, #0
>          beq             3f
>  
> -        smull\i         v7.4S,  v30.\l, z6
> -        smull\i         v16.4S, v30.\l, z2
> +        smull\i         v7.4S,  v30\l, z6
> +        smull\i         v16.4S, v30\l, z2
>          add             v19.4S, v19.4S, v7.4S
>          sub             v22.4S, v22.4S, v7.4S
>          sub             v20.4S, v20.4S, v16.4S
> @@ -200,10 +200,10 @@ function idct_col4_neon\i
>  3:      cmp             x5, #0
>          beq             4f
>  
> -        smlal\i         v17.4S, v31.\l, z7
> -        smlsl\i         v18.4S, v31.\l, z5
> -        smlal\i         v5.4S,  v31.\l, z3
> -        smlsl\i         v6.4S,  v31.\l, z1
> +        smlal\i         v17.4S, v31\l, z7
> +        smlsl\i         v18.4S, v31\l, z5
> +        smlal\i         v5.4S,  v31\l, z3
> +        smlsl\i         v6.4S,  v31\l, z1
>  
>  4:      addhn           v7.4H, v19.4S, v17.4S
>          addhn2          v7.8H, v20.4S, v18.4S
> @@ -219,14 +219,14 @@ function idct_col4_neon\i
>  endfunc
>  .endm
>  
> -declare_idct_col4_neon 1 4H
> -declare_idct_col4_neon 2 8H
> +declare_idct_col4_neon 1, .4H
> +declare_idct_col4_neon 2, .8H
>  
>  function ff_simple_idct_put_neon, export=1
>          idct_start      x2
>  
> -        idct_row4_neon  v24 v25 v26 v27 1
> -        idct_row4_neon  v28 v29 v30 v31 2
> +        idct_row4_neon  v24, v25, v26, v27, 1
> +        idct_row4_neon  v28, v29, v30, v31, 2
>          bl              idct_col4_neon1
>  
>          sqshrun         v1.8B,  v7.8H, #COL_SHIFT-16
> @@ -263,8 +263,8 @@ endfunc
>  function ff_simple_idct_add_neon, export=1
>          idct_start      x2
>  
> -        idct_row4_neon  v24 v25 v26 v27 1
> -        idct_row4_neon  v28 v29 v30 v31 2
> +        idct_row4_neon  v24, v25, v26, v27, 1
> +        idct_row4_neon  v28, v29, v30, v31, 2
>          bl              idct_col4_neon1
>  
>          sshr            v1.8H, V7.8H, #COL_SHIFT-16
> @@ -328,8 +328,8 @@ function ff_simple_idct_neon, export=1
>          idct_start      x0
>  
>          mov             x2,  x0
> -        idct_row4_neon  v24 v25 v26 v27 1
> -        idct_row4_neon  v28 v29 v30 v31 2
> +        idct_row4_neon  v24, v25, v26, v27, 1
> +        idct_row4_neon  v28, v29, v30, v31, 2
>          add             x2, x2, #-128
>          bl              idct_col4_neon1
>  
> -- 
> 2.12.0
> 

If there is no objection, I will push the patch tomorrow.

Matthieu


More information about the ffmpeg-devel mailing list