diff mbox series

[FFmpeg-devel,06/21] swscale/output: add AYUV output support

Message ID 20241008225027.12209-6-jamrial@gmail.com
State New
Headers show
Series None | expand

Commit Message

James Almer Oct. 8, 2024, 10:50 p.m. UTC
Signed-off-by: James Almer <jamrial@gmail.com>
---
 libswscale/output.c                      | 323 ++++++++++++-----------
 libswscale/utils.c                       |   2 +-
 tests/ref/fate/filter-pixdesc-ayuv       |   1 +
 tests/ref/fate/filter-pixfmts-copy       |   1 +
 tests/ref/fate/filter-pixfmts-crop       |   1 +
 tests/ref/fate/filter-pixfmts-field      |   1 +
 tests/ref/fate/filter-pixfmts-fieldorder |   1 +
 tests/ref/fate/filter-pixfmts-hflip      |   1 +
 tests/ref/fate/filter-pixfmts-il         |   1 +
 tests/ref/fate/filter-pixfmts-null       |   1 +
 tests/ref/fate/filter-pixfmts-pad        |   1 +
 tests/ref/fate/filter-pixfmts-scale      |   1 +
 tests/ref/fate/filter-pixfmts-transpose  |   1 +
 tests/ref/fate/filter-pixfmts-vflip      |   1 +
 14 files changed, 183 insertions(+), 154 deletions(-)
 create mode 100644 tests/ref/fate/filter-pixdesc-ayuv

Comments

Michael Niedermayer Oct. 11, 2024, 10:46 p.m. UTC | #1
On Tue, Oct 08, 2024 at 07:50:11PM -0300, James Almer wrote:
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
>  libswscale/output.c                      | 323 ++++++++++++-----------
>  libswscale/utils.c                       |   2 +-
>  tests/ref/fate/filter-pixdesc-ayuv       |   1 +
>  tests/ref/fate/filter-pixfmts-copy       |   1 +
>  tests/ref/fate/filter-pixfmts-crop       |   1 +
>  tests/ref/fate/filter-pixfmts-field      |   1 +
>  tests/ref/fate/filter-pixfmts-fieldorder |   1 +
>  tests/ref/fate/filter-pixfmts-hflip      |   1 +
>  tests/ref/fate/filter-pixfmts-il         |   1 +
>  tests/ref/fate/filter-pixfmts-null       |   1 +
>  tests/ref/fate/filter-pixfmts-pad        |   1 +
>  tests/ref/fate/filter-pixfmts-scale      |   1 +
>  tests/ref/fate/filter-pixfmts-transpose  |   1 +
>  tests/ref/fate/filter-pixfmts-vflip      |   1 +
>  14 files changed, 183 insertions(+), 154 deletions(-)
>  create mode 100644 tests/ref/fate/filter-pixdesc-ayuv
> 
> diff --git a/libswscale/output.c b/libswscale/output.c
> index c9dfd6f60a..328b108089 100644
> --- a/libswscale/output.c
> +++ b/libswscale/output.c
> @@ -2668,165 +2668,177 @@ yuv2xv36le_X_c(SwsContext *c, const int16_t *lumFilter,
>      }
>  }
>  
> -static void
> -yuv2vuyX_1_c(SwsContext *c, const int16_t *buf0,
> -             const int16_t *ubuf[2], const int16_t *vbuf[2],
> -             const int16_t *abuf0, uint8_t *dest, int dstW,
> -             int uvalpha, int y)
> -{
> -    int hasAlpha = !!abuf0;
> -    int i;
> -
> -    if (uvalpha < 2048) {
> -        for (i = 0; i < dstW; i++) {
> -            int Y = (buf0[i] + 64) >> 7;
> -            int U = (ubuf[0][i] + 64) >> 7;
> -            int V = (vbuf[0][i] + 64) >> 7;
> -            int A = 255;
> -
> -            if (Y & 0x100)
> -                Y = av_clip_uint8(Y);
> -            if (U & 0x100)
> -                U = av_clip_uint8(U);
> -            if (V & 0x100)
> -                V = av_clip_uint8(V);
> -
> -            if (hasAlpha) {
> -                A = (abuf0[i] + 64) >> 7;
> -                if (A & 0x100)
> -                    A = av_clip_uint8(A);
> -            }
> -
> -            dest[4 * i    ] = V;
> -            dest[4 * i + 1] = U;
> -            dest[4 * i + 2] = Y;
> -            dest[4 * i + 3] = A;
> -        }
> -    } else {
> -        for (i = 0; i < dstW; i++) {
> -            int Y = (buf0[i] + 64) >> 7;
> -            int U = (ubuf[0][i] + ubuf[1][i] + 128) >> 8;
> -            int V = (vbuf[0][i] + vbuf[1][i] + 128) >> 8;
> -            int A = 255;
> -
> -            if (Y & 0x100)
> -                Y = av_clip_uint8(Y);
> -            if (U & 0x100)
> -                U = av_clip_uint8(U);
> -            if (V & 0x100)
> -                V = av_clip_uint8(V);
> -
> -            if (hasAlpha) {
> -                A = (abuf0[i] + 64) >> 7;
> -                if (A & 0x100)
> -                    A = av_clip_uint8(A);
> -            }
> -
> -            dest[4 * i    ] = V;
> -            dest[4 * i + 1] = U;
> -            dest[4 * i + 2] = Y;
> -            dest[4 * i + 3] = A;
> -        }
> -    }
> +#define AYUV_1_WRAPPER(fmt, C0, C1, C2, C3)                        \
> +static void                                                        \
> +yuv2 ## fmt ##_1_c(SwsContext *c, const int16_t *buf0,             \
> +                   const int16_t *ubuf[2], const int16_t *vbuf[2], \
> +                   const int16_t *abuf0, uint8_t *dest, int dstW,  \
> +                   int uvalpha, int y)                             \
> +{                                                                  \
> +    int hasAlpha = !!abuf0;                                        \
> +    int i;                                                         \
> +                                                                   \
> +    if (uvalpha < 2048) {                                          \
> +        for (i = 0; i < dstW; i++) {                               \
> +            int Y = (buf0[i] + 64) >> 7;                           \
> +            int U = (ubuf[0][i] + 64) >> 7;                        \
> +            int V = (vbuf[0][i] + 64) >> 7;                        \
> +            int A = 255;                                           \
> +                                                                   \
> +            if (Y & 0x100)                                         \
> +                Y = av_clip_uint8(Y);                              \
> +            if (U & 0x100)                                         \
> +                U = av_clip_uint8(U);                              \
> +            if (V & 0x100)                                         \
> +                V = av_clip_uint8(V);                              \
> +                                                                   \
> +            if (hasAlpha) {                                        \
> +                A = (abuf0[i] + 64) >> 7;                          \
> +                if (A & 0x100)                                     \
> +                    A = av_clip_uint8(A);                          \
> +            }                                                      \
> +                                                                   \
> +            dest[4 * i    ] = (C0);                                \
> +            dest[4 * i + 1] = (C1);                                \
> +            dest[4 * i + 2] = (C2);                                \
> +            dest[4 * i + 3] = (C3);                                \
> +        }                                                          \
> +    } else {                                                       \
> +        for (i = 0; i < dstW; i++) {                               \
> +            int Y = (buf0[i] + 64) >> 7;                           \
> +            int U = (ubuf[0][i] + ubuf[1][i] + 128) >> 8;          \
> +            int V = (vbuf[0][i] + vbuf[1][i] + 128) >> 8;          \
> +            int A = 255;                                           \
> +                                                                   \
> +            if (Y & 0x100)                                         \
> +                Y = av_clip_uint8(Y);                              \
> +            if (U & 0x100)                                         \
> +                U = av_clip_uint8(U);                              \
> +            if (V & 0x100)                                         \
> +                V = av_clip_uint8(V);                              \
> +                                                                   \
> +            if (hasAlpha) {                                        \
> +                A = (abuf0[i] + 64) >> 7;                          \
> +                if (A & 0x100)                                     \
> +                    A = av_clip_uint8(A);                          \
> +            }                                                      \
> +                                                                   \
> +            dest[4 * i    ] = (C0);                                \
> +            dest[4 * i + 1] = (C1);                                \
> +            dest[4 * i + 2] = (C2);                                \
> +            dest[4 * i + 3] = (C3);                                \
> +        }                                                          \
> +    }                                                              \
>  }

Is there an advantage in using huge multiline macros here ?

This is ugly and hard to maintain code. Simply writing a always inline function
and trusting that the compiler will inline it should result in more normal
C code and the same result

(is it faster ? or has some other advanatge ?)

thx

[...]
James Almer Oct. 11, 2024, 10:54 p.m. UTC | #2
On 10/11/2024 7:46 PM, Michael Niedermayer wrote:
> On Tue, Oct 08, 2024 at 07:50:11PM -0300, James Almer wrote:
>> Signed-off-by: James Almer <jamrial@gmail.com>
>> ---
>>   libswscale/output.c                      | 323 ++++++++++++-----------
>>   libswscale/utils.c                       |   2 +-
>>   tests/ref/fate/filter-pixdesc-ayuv       |   1 +
>>   tests/ref/fate/filter-pixfmts-copy       |   1 +
>>   tests/ref/fate/filter-pixfmts-crop       |   1 +
>>   tests/ref/fate/filter-pixfmts-field      |   1 +
>>   tests/ref/fate/filter-pixfmts-fieldorder |   1 +
>>   tests/ref/fate/filter-pixfmts-hflip      |   1 +
>>   tests/ref/fate/filter-pixfmts-il         |   1 +
>>   tests/ref/fate/filter-pixfmts-null       |   1 +
>>   tests/ref/fate/filter-pixfmts-pad        |   1 +
>>   tests/ref/fate/filter-pixfmts-scale      |   1 +
>>   tests/ref/fate/filter-pixfmts-transpose  |   1 +
>>   tests/ref/fate/filter-pixfmts-vflip      |   1 +
>>   14 files changed, 183 insertions(+), 154 deletions(-)
>>   create mode 100644 tests/ref/fate/filter-pixdesc-ayuv
>>
>> diff --git a/libswscale/output.c b/libswscale/output.c
>> index c9dfd6f60a..328b108089 100644
>> --- a/libswscale/output.c
>> +++ b/libswscale/output.c
>> @@ -2668,165 +2668,177 @@ yuv2xv36le_X_c(SwsContext *c, const int16_t *lumFilter,
>>       }
>>   }
>>   
>> -static void
>> -yuv2vuyX_1_c(SwsContext *c, const int16_t *buf0,
>> -             const int16_t *ubuf[2], const int16_t *vbuf[2],
>> -             const int16_t *abuf0, uint8_t *dest, int dstW,
>> -             int uvalpha, int y)
>> -{
>> -    int hasAlpha = !!abuf0;
>> -    int i;
>> -
>> -    if (uvalpha < 2048) {
>> -        for (i = 0; i < dstW; i++) {
>> -            int Y = (buf0[i] + 64) >> 7;
>> -            int U = (ubuf[0][i] + 64) >> 7;
>> -            int V = (vbuf[0][i] + 64) >> 7;
>> -            int A = 255;
>> -
>> -            if (Y & 0x100)
>> -                Y = av_clip_uint8(Y);
>> -            if (U & 0x100)
>> -                U = av_clip_uint8(U);
>> -            if (V & 0x100)
>> -                V = av_clip_uint8(V);
>> -
>> -            if (hasAlpha) {
>> -                A = (abuf0[i] + 64) >> 7;
>> -                if (A & 0x100)
>> -                    A = av_clip_uint8(A);
>> -            }
>> -
>> -            dest[4 * i    ] = V;
>> -            dest[4 * i + 1] = U;
>> -            dest[4 * i + 2] = Y;
>> -            dest[4 * i + 3] = A;
>> -        }
>> -    } else {
>> -        for (i = 0; i < dstW; i++) {
>> -            int Y = (buf0[i] + 64) >> 7;
>> -            int U = (ubuf[0][i] + ubuf[1][i] + 128) >> 8;
>> -            int V = (vbuf[0][i] + vbuf[1][i] + 128) >> 8;
>> -            int A = 255;
>> -
>> -            if (Y & 0x100)
>> -                Y = av_clip_uint8(Y);
>> -            if (U & 0x100)
>> -                U = av_clip_uint8(U);
>> -            if (V & 0x100)
>> -                V = av_clip_uint8(V);
>> -
>> -            if (hasAlpha) {
>> -                A = (abuf0[i] + 64) >> 7;
>> -                if (A & 0x100)
>> -                    A = av_clip_uint8(A);
>> -            }
>> -
>> -            dest[4 * i    ] = V;
>> -            dest[4 * i + 1] = U;
>> -            dest[4 * i + 2] = Y;
>> -            dest[4 * i + 3] = A;
>> -        }
>> -    }
>> +#define AYUV_1_WRAPPER(fmt, C0, C1, C2, C3)                        \
>> +static void                                                        \
>> +yuv2 ## fmt ##_1_c(SwsContext *c, const int16_t *buf0,             \
>> +                   const int16_t *ubuf[2], const int16_t *vbuf[2], \
>> +                   const int16_t *abuf0, uint8_t *dest, int dstW,  \
>> +                   int uvalpha, int y)                             \
>> +{                                                                  \
>> +    int hasAlpha = !!abuf0;                                        \
>> +    int i;                                                         \
>> +                                                                   \
>> +    if (uvalpha < 2048) {                                          \
>> +        for (i = 0; i < dstW; i++) {                               \
>> +            int Y = (buf0[i] + 64) >> 7;                           \
>> +            int U = (ubuf[0][i] + 64) >> 7;                        \
>> +            int V = (vbuf[0][i] + 64) >> 7;                        \
>> +            int A = 255;                                           \
>> +                                                                   \
>> +            if (Y & 0x100)                                         \
>> +                Y = av_clip_uint8(Y);                              \
>> +            if (U & 0x100)                                         \
>> +                U = av_clip_uint8(U);                              \
>> +            if (V & 0x100)                                         \
>> +                V = av_clip_uint8(V);                              \
>> +                                                                   \
>> +            if (hasAlpha) {                                        \
>> +                A = (abuf0[i] + 64) >> 7;                          \
>> +                if (A & 0x100)                                     \
>> +                    A = av_clip_uint8(A);                          \
>> +            }                                                      \
>> +                                                                   \
>> +            dest[4 * i    ] = (C0);                                \
>> +            dest[4 * i + 1] = (C1);                                \
>> +            dest[4 * i + 2] = (C2);                                \
>> +            dest[4 * i + 3] = (C3);                                \
>> +        }                                                          \
>> +    } else {                                                       \
>> +        for (i = 0; i < dstW; i++) {                               \
>> +            int Y = (buf0[i] + 64) >> 7;                           \
>> +            int U = (ubuf[0][i] + ubuf[1][i] + 128) >> 8;          \
>> +            int V = (vbuf[0][i] + vbuf[1][i] + 128) >> 8;          \
>> +            int A = 255;                                           \
>> +                                                                   \
>> +            if (Y & 0x100)                                         \
>> +                Y = av_clip_uint8(Y);                              \
>> +            if (U & 0x100)                                         \
>> +                U = av_clip_uint8(U);                              \
>> +            if (V & 0x100)                                         \
>> +                V = av_clip_uint8(V);                              \
>> +                                                                   \
>> +            if (hasAlpha) {                                        \
>> +                A = (abuf0[i] + 64) >> 7;                          \
>> +                if (A & 0x100)                                     \
>> +                    A = av_clip_uint8(A);                          \
>> +            }                                                      \
>> +                                                                   \
>> +            dest[4 * i    ] = (C0);                                \
>> +            dest[4 * i + 1] = (C1);                                \
>> +            dest[4 * i + 2] = (C2);                                \
>> +            dest[4 * i + 3] = (C3);                                \
>> +        }                                                          \
>> +    }                                                              \
>>   }
> 
> Is there an advantage in using huge multiline macros here ?
> 
> This is ugly and hard to maintain code. Simply writing a always inline function
> and trusting that the compiler will inline it should result in more normal
> C code and the same result
> 
> (is it faster ? or has some other advanatge ?)

No, just figured doing it like this. I can make it an always inline 
function.
Michael Niedermayer Oct. 11, 2024, 11:05 p.m. UTC | #3
On Fri, Oct 11, 2024 at 07:54:48PM -0300, James Almer wrote:
> On 10/11/2024 7:46 PM, Michael Niedermayer wrote:
> > On Tue, Oct 08, 2024 at 07:50:11PM -0300, James Almer wrote:
> > > Signed-off-by: James Almer <jamrial@gmail.com>
> > > ---
> > >   libswscale/output.c                      | 323 ++++++++++++-----------
> > >   libswscale/utils.c                       |   2 +-
> > >   tests/ref/fate/filter-pixdesc-ayuv       |   1 +
> > >   tests/ref/fate/filter-pixfmts-copy       |   1 +
> > >   tests/ref/fate/filter-pixfmts-crop       |   1 +
> > >   tests/ref/fate/filter-pixfmts-field      |   1 +
> > >   tests/ref/fate/filter-pixfmts-fieldorder |   1 +
> > >   tests/ref/fate/filter-pixfmts-hflip      |   1 +
> > >   tests/ref/fate/filter-pixfmts-il         |   1 +
> > >   tests/ref/fate/filter-pixfmts-null       |   1 +
> > >   tests/ref/fate/filter-pixfmts-pad        |   1 +
> > >   tests/ref/fate/filter-pixfmts-scale      |   1 +
> > >   tests/ref/fate/filter-pixfmts-transpose  |   1 +
> > >   tests/ref/fate/filter-pixfmts-vflip      |   1 +
> > >   14 files changed, 183 insertions(+), 154 deletions(-)
> > >   create mode 100644 tests/ref/fate/filter-pixdesc-ayuv
> > > 
> > > diff --git a/libswscale/output.c b/libswscale/output.c
> > > index c9dfd6f60a..328b108089 100644
> > > --- a/libswscale/output.c
> > > +++ b/libswscale/output.c
> > > @@ -2668,165 +2668,177 @@ yuv2xv36le_X_c(SwsContext *c, const int16_t *lumFilter,
> > >       }
> > >   }
> > > -static void
> > > -yuv2vuyX_1_c(SwsContext *c, const int16_t *buf0,
> > > -             const int16_t *ubuf[2], const int16_t *vbuf[2],
> > > -             const int16_t *abuf0, uint8_t *dest, int dstW,
> > > -             int uvalpha, int y)
> > > -{
> > > -    int hasAlpha = !!abuf0;
> > > -    int i;
> > > -
> > > -    if (uvalpha < 2048) {
> > > -        for (i = 0; i < dstW; i++) {
> > > -            int Y = (buf0[i] + 64) >> 7;
> > > -            int U = (ubuf[0][i] + 64) >> 7;
> > > -            int V = (vbuf[0][i] + 64) >> 7;
> > > -            int A = 255;
> > > -
> > > -            if (Y & 0x100)
> > > -                Y = av_clip_uint8(Y);
> > > -            if (U & 0x100)
> > > -                U = av_clip_uint8(U);
> > > -            if (V & 0x100)
> > > -                V = av_clip_uint8(V);
> > > -
> > > -            if (hasAlpha) {
> > > -                A = (abuf0[i] + 64) >> 7;
> > > -                if (A & 0x100)
> > > -                    A = av_clip_uint8(A);
> > > -            }
> > > -
> > > -            dest[4 * i    ] = V;
> > > -            dest[4 * i + 1] = U;
> > > -            dest[4 * i + 2] = Y;
> > > -            dest[4 * i + 3] = A;
> > > -        }
> > > -    } else {
> > > -        for (i = 0; i < dstW; i++) {
> > > -            int Y = (buf0[i] + 64) >> 7;
> > > -            int U = (ubuf[0][i] + ubuf[1][i] + 128) >> 8;
> > > -            int V = (vbuf[0][i] + vbuf[1][i] + 128) >> 8;
> > > -            int A = 255;
> > > -
> > > -            if (Y & 0x100)
> > > -                Y = av_clip_uint8(Y);
> > > -            if (U & 0x100)
> > > -                U = av_clip_uint8(U);
> > > -            if (V & 0x100)
> > > -                V = av_clip_uint8(V);
> > > -
> > > -            if (hasAlpha) {
> > > -                A = (abuf0[i] + 64) >> 7;
> > > -                if (A & 0x100)
> > > -                    A = av_clip_uint8(A);
> > > -            }
> > > -
> > > -            dest[4 * i    ] = V;
> > > -            dest[4 * i + 1] = U;
> > > -            dest[4 * i + 2] = Y;
> > > -            dest[4 * i + 3] = A;
> > > -        }
> > > -    }
> > > +#define AYUV_1_WRAPPER(fmt, C0, C1, C2, C3)                        \
> > > +static void                                                        \
> > > +yuv2 ## fmt ##_1_c(SwsContext *c, const int16_t *buf0,             \
> > > +                   const int16_t *ubuf[2], const int16_t *vbuf[2], \
> > > +                   const int16_t *abuf0, uint8_t *dest, int dstW,  \
> > > +                   int uvalpha, int y)                             \
> > > +{                                                                  \
> > > +    int hasAlpha = !!abuf0;                                        \
> > > +    int i;                                                         \
> > > +                                                                   \
> > > +    if (uvalpha < 2048) {                                          \
> > > +        for (i = 0; i < dstW; i++) {                               \
> > > +            int Y = (buf0[i] + 64) >> 7;                           \
> > > +            int U = (ubuf[0][i] + 64) >> 7;                        \
> > > +            int V = (vbuf[0][i] + 64) >> 7;                        \
> > > +            int A = 255;                                           \
> > > +                                                                   \
> > > +            if (Y & 0x100)                                         \
> > > +                Y = av_clip_uint8(Y);                              \
> > > +            if (U & 0x100)                                         \
> > > +                U = av_clip_uint8(U);                              \
> > > +            if (V & 0x100)                                         \
> > > +                V = av_clip_uint8(V);                              \
> > > +                                                                   \
> > > +            if (hasAlpha) {                                        \
> > > +                A = (abuf0[i] + 64) >> 7;                          \
> > > +                if (A & 0x100)                                     \
> > > +                    A = av_clip_uint8(A);                          \
> > > +            }                                                      \
> > > +                                                                   \
> > > +            dest[4 * i    ] = (C0);                                \
> > > +            dest[4 * i + 1] = (C1);                                \
> > > +            dest[4 * i + 2] = (C2);                                \
> > > +            dest[4 * i + 3] = (C3);                                \
> > > +        }                                                          \
> > > +    } else {                                                       \
> > > +        for (i = 0; i < dstW; i++) {                               \
> > > +            int Y = (buf0[i] + 64) >> 7;                           \
> > > +            int U = (ubuf[0][i] + ubuf[1][i] + 128) >> 8;          \
> > > +            int V = (vbuf[0][i] + vbuf[1][i] + 128) >> 8;          \
> > > +            int A = 255;                                           \
> > > +                                                                   \
> > > +            if (Y & 0x100)                                         \
> > > +                Y = av_clip_uint8(Y);                              \
> > > +            if (U & 0x100)                                         \
> > > +                U = av_clip_uint8(U);                              \
> > > +            if (V & 0x100)                                         \
> > > +                V = av_clip_uint8(V);                              \
> > > +                                                                   \
> > > +            if (hasAlpha) {                                        \
> > > +                A = (abuf0[i] + 64) >> 7;                          \
> > > +                if (A & 0x100)                                     \
> > > +                    A = av_clip_uint8(A);                          \
> > > +            }                                                      \
> > > +                                                                   \
> > > +            dest[4 * i    ] = (C0);                                \
> > > +            dest[4 * i + 1] = (C1);                                \
> > > +            dest[4 * i + 2] = (C2);                                \
> > > +            dest[4 * i + 3] = (C3);                                \
> > > +        }                                                          \
> > > +    }                                                              \
> > >   }
> > 
> > Is there an advantage in using huge multiline macros here ?
> > 
> > This is ugly and hard to maintain code. Simply writing a always inline function
> > and trusting that the compiler will inline it should result in more normal
> > C code and the same result
> > 
> > (is it faster ? or has some other advanatge ?)
> 
> No, just figured doing it like this. I can make it an always inline
> function.

please do, we have a few slight differnt ways its done currently, heres one example
(and in this example, in fact output_pixels could be itself replaced by a function,
 which would probably be cleaner too)

In fact everything can be cleaned up and i certainly would love to see someone
have a brilliant idea to make it cleaner with no disadvanatges ...

#define output_pixels(pos, Y1, U, Y2, V) \
    if (target == AV_PIX_FMT_YUYV422) { \
        dest[pos + 0] = Y1; \
        dest[pos + 1] = U;  \
        dest[pos + 2] = Y2; \
        dest[pos + 3] = V;  \
    } else if (target == AV_PIX_FMT_YVYU422) { \
        dest[pos + 0] = Y1; \
        dest[pos + 1] = V;  \
        dest[pos + 2] = Y2; \
        dest[pos + 3] = U;  \
    } else { /* AV_PIX_FMT_UYVY422 */ \
        dest[pos + 0] = U;  \
        dest[pos + 1] = Y1; \
        dest[pos + 2] = V;  \
        dest[pos + 3] = Y2; \
    }

static av_always_inline void
yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
                     const int16_t **lumSrc, int lumFilterSize,
                     const int16_t *chrFilter, const int16_t **chrUSrc,
                     const int16_t **chrVSrc, int chrFilterSize,
                     const int16_t **alpSrc, uint8_t *dest, int dstW,
                     int y, enum AVPixelFormat target)
{
    int i;

    for (i = 0; i < ((dstW + 1) >> 1); i++) {
        int j;
        int Y1 = 1 << 18;
        int Y2 = 1 << 18;
        int U  = 1 << 18;
        int V  = 1 << 18;

        for (j = 0; j < lumFilterSize; j++) {
            Y1 += lumSrc[j][i * 2]     * lumFilter[j];
            Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
        }
        for (j = 0; j < chrFilterSize; j++) {
            U += chrUSrc[j][i] * chrFilter[j];
            V += chrVSrc[j][i] * chrFilter[j];
        }
        Y1 >>= 19;
        Y2 >>= 19;
        U  >>= 19;
        V  >>= 19;
        if ((Y1 | Y2 | U | V) & 0x100) {
            Y1 = av_clip_uint8(Y1);
            Y2 = av_clip_uint8(Y2);
            U  = av_clip_uint8(U);
            V  = av_clip_uint8(V);
        }
        output_pixels(4*i, Y1, U, Y2, V);
    }
}

static av_always_inline void
yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
                     const int16_t *ubuf[2], const int16_t *vbuf[2],
                     const int16_t *abuf[2], uint8_t *dest, int dstW,
                     int yalpha, int uvalpha, int y,
                     enum AVPixelFormat target)
{
    const int16_t *buf0  = buf[0],  *buf1  = buf[1],
                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
                  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
    int  yalpha1 = 4096 - yalpha;
    int uvalpha1 = 4096 - uvalpha;
    int i;
    av_assert2(yalpha  <= 4096U);
    av_assert2(uvalpha <= 4096U);

    for (i = 0; i < ((dstW + 1) >> 1); i++) {
        int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha)  >> 19;
        int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha)  >> 19;
        int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha) >> 19;
        int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha) >> 19;

        if ((Y1 | Y2 | U | V) & 0x100) {
            Y1 = av_clip_uint8(Y1);
            Y2 = av_clip_uint8(Y2);
            U  = av_clip_uint8(U);
            V  = av_clip_uint8(V);
        }

        output_pixels(i * 4, Y1, U, Y2, V);
    }
}

static av_always_inline void
yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
                     const int16_t *ubuf[2], const int16_t *vbuf[2],
                     const int16_t *abuf0, uint8_t *dest, int dstW,
                     int uvalpha, int y, enum AVPixelFormat target)
{
    const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
    int i;

    if (uvalpha < 2048) {
        for (i = 0; i < ((dstW + 1) >> 1); i++) {
            int Y1 = (buf0[i * 2    ]+64) >> 7;
            int Y2 = (buf0[i * 2 + 1]+64) >> 7;
            int U  = (ubuf0[i]       +64) >> 7;
            int V  = (vbuf0[i]       +64) >> 7;

            if ((Y1 | Y2 | U | V) & 0x100) {
                Y1 = av_clip_uint8(Y1);
                Y2 = av_clip_uint8(Y2);
                U  = av_clip_uint8(U);
                V  = av_clip_uint8(V);
            }

            output_pixels(i * 4, Y1, U, Y2, V);
        }
    } else {
        const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
        for (i = 0; i < ((dstW + 1) >> 1); i++) {
            int Y1 = (buf0[i * 2    ]    + 64) >> 7;
            int Y2 = (buf0[i * 2 + 1]    + 64) >> 7;
            int U  = (ubuf0[i] + ubuf1[i]+128) >> 8;
            int V  = (vbuf0[i] + vbuf1[i]+128) >> 8;

            if ((Y1 | Y2 | U | V) & 0x100) {
                Y1 = av_clip_uint8(Y1);
                Y2 = av_clip_uint8(Y2);
                U  = av_clip_uint8(U);
                V  = av_clip_uint8(V);
            }

            output_pixels(i * 4, Y1, U, Y2, V);
        }
    }
}
[...]
diff mbox series

Patch

diff --git a/libswscale/output.c b/libswscale/output.c
index c9dfd6f60a..328b108089 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -2668,165 +2668,177 @@  yuv2xv36le_X_c(SwsContext *c, const int16_t *lumFilter,
     }
 }
 
-static void
-yuv2vuyX_1_c(SwsContext *c, const int16_t *buf0,
-             const int16_t *ubuf[2], const int16_t *vbuf[2],
-             const int16_t *abuf0, uint8_t *dest, int dstW,
-             int uvalpha, int y)
-{
-    int hasAlpha = !!abuf0;
-    int i;
-
-    if (uvalpha < 2048) {
-        for (i = 0; i < dstW; i++) {
-            int Y = (buf0[i] + 64) >> 7;
-            int U = (ubuf[0][i] + 64) >> 7;
-            int V = (vbuf[0][i] + 64) >> 7;
-            int A = 255;
-
-            if (Y & 0x100)
-                Y = av_clip_uint8(Y);
-            if (U & 0x100)
-                U = av_clip_uint8(U);
-            if (V & 0x100)
-                V = av_clip_uint8(V);
-
-            if (hasAlpha) {
-                A = (abuf0[i] + 64) >> 7;
-                if (A & 0x100)
-                    A = av_clip_uint8(A);
-            }
-
-            dest[4 * i    ] = V;
-            dest[4 * i + 1] = U;
-            dest[4 * i + 2] = Y;
-            dest[4 * i + 3] = A;
-        }
-    } else {
-        for (i = 0; i < dstW; i++) {
-            int Y = (buf0[i] + 64) >> 7;
-            int U = (ubuf[0][i] + ubuf[1][i] + 128) >> 8;
-            int V = (vbuf[0][i] + vbuf[1][i] + 128) >> 8;
-            int A = 255;
-
-            if (Y & 0x100)
-                Y = av_clip_uint8(Y);
-            if (U & 0x100)
-                U = av_clip_uint8(U);
-            if (V & 0x100)
-                V = av_clip_uint8(V);
-
-            if (hasAlpha) {
-                A = (abuf0[i] + 64) >> 7;
-                if (A & 0x100)
-                    A = av_clip_uint8(A);
-            }
-
-            dest[4 * i    ] = V;
-            dest[4 * i + 1] = U;
-            dest[4 * i + 2] = Y;
-            dest[4 * i + 3] = A;
-        }
-    }
+#define AYUV_1_WRAPPER(fmt, C0, C1, C2, C3)                        \
+static void                                                        \
+yuv2 ## fmt ##_1_c(SwsContext *c, const int16_t *buf0,             \
+                   const int16_t *ubuf[2], const int16_t *vbuf[2], \
+                   const int16_t *abuf0, uint8_t *dest, int dstW,  \
+                   int uvalpha, int y)                             \
+{                                                                  \
+    int hasAlpha = !!abuf0;                                        \
+    int i;                                                         \
+                                                                   \
+    if (uvalpha < 2048) {                                          \
+        for (i = 0; i < dstW; i++) {                               \
+            int Y = (buf0[i] + 64) >> 7;                           \
+            int U = (ubuf[0][i] + 64) >> 7;                        \
+            int V = (vbuf[0][i] + 64) >> 7;                        \
+            int A = 255;                                           \
+                                                                   \
+            if (Y & 0x100)                                         \
+                Y = av_clip_uint8(Y);                              \
+            if (U & 0x100)                                         \
+                U = av_clip_uint8(U);                              \
+            if (V & 0x100)                                         \
+                V = av_clip_uint8(V);                              \
+                                                                   \
+            if (hasAlpha) {                                        \
+                A = (abuf0[i] + 64) >> 7;                          \
+                if (A & 0x100)                                     \
+                    A = av_clip_uint8(A);                          \
+            }                                                      \
+                                                                   \
+            dest[4 * i    ] = (C0);                                \
+            dest[4 * i + 1] = (C1);                                \
+            dest[4 * i + 2] = (C2);                                \
+            dest[4 * i + 3] = (C3);                                \
+        }                                                          \
+    } else {                                                       \
+        for (i = 0; i < dstW; i++) {                               \
+            int Y = (buf0[i] + 64) >> 7;                           \
+            int U = (ubuf[0][i] + ubuf[1][i] + 128) >> 8;          \
+            int V = (vbuf[0][i] + vbuf[1][i] + 128) >> 8;          \
+            int A = 255;                                           \
+                                                                   \
+            if (Y & 0x100)                                         \
+                Y = av_clip_uint8(Y);                              \
+            if (U & 0x100)                                         \
+                U = av_clip_uint8(U);                              \
+            if (V & 0x100)                                         \
+                V = av_clip_uint8(V);                              \
+                                                                   \
+            if (hasAlpha) {                                        \
+                A = (abuf0[i] + 64) >> 7;                          \
+                if (A & 0x100)                                     \
+                    A = av_clip_uint8(A);                          \
+            }                                                      \
+                                                                   \
+            dest[4 * i    ] = (C0);                                \
+            dest[4 * i + 1] = (C1);                                \
+            dest[4 * i + 2] = (C2);                                \
+            dest[4 * i + 3] = (C3);                                \
+        }                                                          \
+    }                                                              \
 }
 
-static void
-yuv2vuyX_2_c(SwsContext *c, const int16_t *buf[2],
-            const int16_t *ubuf[2], const int16_t *vbuf[2],
-            const int16_t *abuf[2], uint8_t *dest, int dstW,
-            int yalpha, int uvalpha, int y)
-{
-    int hasAlpha = abuf && abuf[0] && abuf[1];
-    const int16_t *buf0  = buf[0],  *buf1  = buf[1],
-                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
-                  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
-                  *abuf0 = hasAlpha ? abuf[0] : NULL,
-                  *abuf1 = hasAlpha ? abuf[1] : NULL;
-    int yalpha1  = 4096 - yalpha;
-    int uvalpha1 = 4096 - uvalpha;
-    int i;
-
-    av_assert2(yalpha  <= 4096U);
-    av_assert2(uvalpha <= 4096U);
-
-    for (i = 0; i < dstW; i++) {
-        int Y = (buf0[i]  * yalpha1  + buf1[i]  * yalpha)  >> 19;
-        int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
-        int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
-        int A = 255;
-
-        if (Y & 0x100)
-            Y = av_clip_uint8(Y);
-        if (U & 0x100)
-            U = av_clip_uint8(U);
-        if (V & 0x100)
-            V = av_clip_uint8(V);
-
-        if (hasAlpha) {
-            A = (abuf0[i] * yalpha1 + abuf1[i] * yalpha) >> 19;
-            A = av_clip_uint8(A);
-        }
-
-        dest[4 * i    ] = V;
-        dest[4 * i + 1] = U;
-        dest[4 * i + 2] = Y;
-        dest[4 * i + 3] = A;
-    }
+AYUV_1_WRAPPER(vuyX, V, U, Y, A)
+AYUV_1_WRAPPER(ayuv, A, Y, U, V)
+
+#define AYUV_2_WRAPPER(fmt, C0, C1, C2, C3)                         \
+static void                                                         \
+yuv2 ## fmt ##_2_c(SwsContext *c, const int16_t *buf[2],            \
+                   const int16_t *ubuf[2], const int16_t *vbuf[2],  \
+                   const int16_t *abuf[2], uint8_t *dest, int dstW, \
+                   int yalpha, int uvalpha, int y)                  \
+{                                                                   \
+    int hasAlpha = abuf && abuf[0] && abuf[1];                      \
+    const int16_t *buf0  = buf[0],  *buf1  = buf[1],                \
+                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],               \
+                  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],               \
+                  *abuf0 = hasAlpha ? abuf[0] : NULL,               \
+                  *abuf1 = hasAlpha ? abuf[1] : NULL;               \
+    int yalpha1  = 4096 - yalpha;                                   \
+    int uvalpha1 = 4096 - uvalpha;                                  \
+    int i;                                                          \
+                                                                    \
+    av_assert2(yalpha  <= 4096U);                                   \
+    av_assert2(uvalpha <= 4096U);                                   \
+                                                                    \
+    for (i = 0; i < dstW; i++) {                                    \
+        int Y = (buf0[i]  * yalpha1  + buf1[i]  * yalpha)  >> 19;   \
+        int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;   \
+        int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;   \
+        int A = 255;                                                \
+                                                                    \
+        if (Y & 0x100)                                              \
+            Y = av_clip_uint8(Y);                                   \
+        if (U & 0x100)                                              \
+            U = av_clip_uint8(U);                                   \
+        if (V & 0x100)                                              \
+            V = av_clip_uint8(V);                                   \
+                                                                    \
+        if (hasAlpha) {                                             \
+            A = (abuf0[i] * yalpha1 + abuf1[i] * yalpha) >> 19;     \
+            A = av_clip_uint8(A);                                   \
+        }                                                           \
+                                                                    \
+        dest[4 * i    ] = (C0);                                     \
+        dest[4 * i + 1] = (C1);                                     \
+        dest[4 * i + 2] = (C2);                                     \
+        dest[4 * i + 3] = (C3);                                     \
+    }                                                               \
 }
 
-static void
-yuv2vuyX_X_c(SwsContext *c, const int16_t *lumFilter,
-             const int16_t **lumSrc, int lumFilterSize,
-             const int16_t *chrFilter, const int16_t **chrUSrc,
-             const int16_t **chrVSrc, int chrFilterSize,
-             const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
-{
-    int i;
-
-    for (i = 0; i < dstW; i++) {
-        int j;
-        int Y = 1 << 18, U = 1 << 18;
-        int V = 1 << 18, A = 255;
-
-        for (j = 0; j < lumFilterSize; j++)
-            Y += lumSrc[j][i] * lumFilter[j];
-
-        for (j = 0; j < chrFilterSize; j++)
-            U += chrUSrc[j][i] * chrFilter[j];
-
-        for (j = 0; j < chrFilterSize; j++)
-            V += chrVSrc[j][i] * chrFilter[j];
-
-        Y >>= 19;
-        U >>= 19;
-        V >>= 19;
-
-        if (Y  & 0x100)
-            Y = av_clip_uint8(Y);
-        if (U  & 0x100)
-            U = av_clip_uint8(U);
-        if (V  & 0x100)
-            V = av_clip_uint8(V);
-
-        if (alpSrc) {
-            A = 1 << 18;
-
-            for (j = 0; j < lumFilterSize; j++)
-                A += alpSrc[j][i] * lumFilter[j];
-
-            A >>= 19;
-
-            if (A & 0x100)
-                A = av_clip_uint8(A);
-        }
-
-        dest[4 * i    ] = V;
-        dest[4 * i + 1] = U;
-        dest[4 * i + 2] = Y;
-        dest[4 * i + 3] = A;
-    }
+AYUV_2_WRAPPER(vuyX, V, U, Y, A)
+AYUV_2_WRAPPER(ayuv, A, Y, U, V)
+
+#define AYUV_X_WRAPPER(fmt, C0, C1, C2, C3)                           \
+static void                                                           \
+yuv2 ## fmt ##_X_c(SwsContext *c, const int16_t *lumFilter,           \
+             const int16_t **lumSrc, int lumFilterSize,               \
+             const int16_t *chrFilter, const int16_t **chrUSrc,       \
+             const int16_t **chrVSrc, int chrFilterSize,              \
+             const int16_t **alpSrc, uint8_t *dest, int dstW, int y)  \
+{                                                                     \
+    int i;                                                            \
+                                                                      \
+    for (i = 0; i < dstW; i++) {                                      \
+        int j;                                                        \
+        int Y = 1 << 18, U = 1 << 18;                                 \
+        int V = 1 << 18, A = 255;                                     \
+                                                                      \
+        for (j = 0; j < lumFilterSize; j++)                           \
+            Y += lumSrc[j][i] * lumFilter[j];                         \
+                                                                      \
+        for (j = 0; j < chrFilterSize; j++)                           \
+            U += chrUSrc[j][i] * chrFilter[j];                        \
+                                                                      \
+        for (j = 0; j < chrFilterSize; j++)                           \
+            V += chrVSrc[j][i] * chrFilter[j];                        \
+                                                                      \
+        Y >>= 19;                                                     \
+        U >>= 19;                                                     \
+        V >>= 19;                                                     \
+                                                                      \
+        if (Y  & 0x100)                                               \
+            Y = av_clip_uint8(Y);                                     \
+        if (U  & 0x100)                                               \
+            U = av_clip_uint8(U);                                     \
+        if (V  & 0x100)                                               \
+            V = av_clip_uint8(V);                                     \
+                                                                      \
+        if (alpSrc) {                                                 \
+            A = 1 << 18;                                              \
+                                                                      \
+            for (j = 0; j < lumFilterSize; j++)                       \
+                A += alpSrc[j][i] * lumFilter[j];                     \
+                                                                      \
+            A >>= 19;                                                 \
+                                                                      \
+            if (A & 0x100)                                            \
+                A = av_clip_uint8(A);                                 \
+        }                                                             \
+                                                                      \
+        dest[4 * i    ] = (C0);                                       \
+        dest[4 * i + 1] = (C1);                                       \
+        dest[4 * i + 2] = (C2);                                       \
+        dest[4 * i + 3] = (C3);                                       \
+    }                                                                 \
 }
 
+AYUV_X_WRAPPER(vuyX, V, U, Y, A)
+AYUV_X_WRAPPER(ayuv, A, Y, U, V)
+
 #define output_pixel(pos, val, bits) \
     AV_WL16(pos, av_clip_uintp2(val >> shift, bits) << output_shift);
 
@@ -3379,6 +3391,11 @@  av_cold void ff_sws_init_output_funcs(SwsContext *c,
     case AV_PIX_FMT_AYUV64LE:
         *yuv2packedX = yuv2ayuv64le_X_c;
         break;
+    case AV_PIX_FMT_AYUV:
+        *yuv2packed1 = yuv2ayuv_1_c;
+        *yuv2packed2 = yuv2ayuv_2_c;
+        *yuv2packedX = yuv2ayuv_X_c;
+        break;
     case AV_PIX_FMT_VUYA:
     case AV_PIX_FMT_VUYX:
         *yuv2packed1 = yuv2vuyX_1_c;
diff --git a/libswscale/utils.c b/libswscale/utils.c
index e2ad71a38d..84d23c3ff2 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -268,7 +268,7 @@  static const FormatEntry format_entries[] = {
     [AV_PIX_FMT_RGBAF16LE]   = { 1, 0 },
     [AV_PIX_FMT_XV30LE]      = { 1, 1 },
     [AV_PIX_FMT_XV36LE]      = { 1, 1 },
-    [AV_PIX_FMT_AYUV]        = { 1, 0 },
+    [AV_PIX_FMT_AYUV]        = { 1, 1 },
     [AV_PIX_FMT_UYVA]        = { 1, 0 },
     [AV_PIX_FMT_VYU444]      = { 1, 0 },
     [AV_PIX_FMT_V30XLE]      = { 1, 0 },
diff --git a/tests/ref/fate/filter-pixdesc-ayuv b/tests/ref/fate/filter-pixdesc-ayuv
new file mode 100644
index 0000000000..178e847222
--- /dev/null
+++ b/tests/ref/fate/filter-pixdesc-ayuv
@@ -0,0 +1 @@ 
+pixdesc-ayuv        a21ac760efdec0065bcf605f4ed75f7f
diff --git a/tests/ref/fate/filter-pixfmts-copy b/tests/ref/fate/filter-pixfmts-copy
index 120129dc1e..dc165b5ec5 100644
--- a/tests/ref/fate/filter-pixfmts-copy
+++ b/tests/ref/fate/filter-pixfmts-copy
@@ -2,6 +2,7 @@ 
 0rgb                527ef3d164c8fd0700493733959689c2
 abgr                023ecf6396d324edb113e4a483b79ba2
 argb                f003b555ef429222005d33844cca9325
+ayuv                eb7e43cfbb961d1e369311d0f58b9f52
 ayuv64le            07b9c969dfbe4add4c0626773b151d4f
 bgr0                6fcd67c8e6cec723dab21c70cf53dc16
 bgr24               4cff3814819f02ecf5824edfd768d2b1
diff --git a/tests/ref/fate/filter-pixfmts-crop b/tests/ref/fate/filter-pixfmts-crop
index 4731e96fc0..07ded708bc 100644
--- a/tests/ref/fate/filter-pixfmts-crop
+++ b/tests/ref/fate/filter-pixfmts-crop
@@ -2,6 +2,7 @@ 
 0rgb                974833c777e6abe6d84dc59af2ca5625
 abgr                1d21f5b8a20186ac9dd54459c986a2a7
 argb                8b822972049a1e207000763f2564d6e0
+ayuv                f2846cdd8fc01820e561fe4d3855938c
 ayuv64le            ab2f7bc8f150af47c42c778e3ea28bce
 bgr0                38a84849a9198667c348c686802e3b52
 bgr24               1dacd8e04bf0eff163e82250d01a9cc7
diff --git a/tests/ref/fate/filter-pixfmts-field b/tests/ref/fate/filter-pixfmts-field
index 0727d733f2..9e1e06bd80 100644
--- a/tests/ref/fate/filter-pixfmts-field
+++ b/tests/ref/fate/filter-pixfmts-field
@@ -2,6 +2,7 @@ 
 0rgb                e2c35753a2271d1f9455b1809bc0e907
 abgr                c0eb95959edf5d40ff8af315e62d0f8a
 argb                6dca4f2987b49b7d63f702d17bace630
+ayuv                6aeadbc3ec2a6ff2bfab9495e7285fed
 ayuv64le            d9836decca6323ba88b3b3d02257c0b6
 bgr0                1da3fdbac616b3b410d081e39ed7a1f6
 bgr24               573c76d77b1cbe6534ea7c0267dc1b13
diff --git a/tests/ref/fate/filter-pixfmts-fieldorder b/tests/ref/fate/filter-pixfmts-fieldorder
index 1d7a98ce11..8f1febf4ad 100644
--- a/tests/ref/fate/filter-pixfmts-fieldorder
+++ b/tests/ref/fate/filter-pixfmts-fieldorder
@@ -2,6 +2,7 @@ 
 0rgb                2b0f066cfa0bef378a492875d541de8f
 abgr                832924b5351361db68dbdbb96c60ae55
 argb                80d08e68cb91bc8f2f817516e65f0bd0
+ayuv                61b23fc754d8ca0d6740fa08a0a6ebf2
 ayuv64le            84ef6260fe02427da946d4a2207fb54c
 bgr0                d2c676224ea80ac3ce01afde325ea1a0
 bgr24               b7fdbcd10f20e6ea2d40aae0f329f80d
diff --git a/tests/ref/fate/filter-pixfmts-hflip b/tests/ref/fate/filter-pixfmts-hflip
index 43404db154..a49abba15b 100644
--- a/tests/ref/fate/filter-pixfmts-hflip
+++ b/tests/ref/fate/filter-pixfmts-hflip
@@ -2,6 +2,7 @@ 
 0rgb                ada57572ee2b35f86edac9b911ce8523
 abgr                d2da6c3ee72e4a89a7cd011dd08566b2
 argb                36cf791c52c5463bfc52a070de54337e
+ayuv                122f591eac5669b658c22b09355e4263
 ayuv64le            4cedbc38b3d4dcb26cdab170ce6d667b
 bgr0                66e9fda4e658d73bfe4fc9d792542271
 bgr24               db074979bd684ca4547e28681ad3f6ab
diff --git a/tests/ref/fate/filter-pixfmts-il b/tests/ref/fate/filter-pixfmts-il
index 4a6f15f074..1ac9cf3bd2 100644
--- a/tests/ref/fate/filter-pixfmts-il
+++ b/tests/ref/fate/filter-pixfmts-il
@@ -2,6 +2,7 @@ 
 0rgb                53efe0182723cd1dedfdbf56357c76f5
 abgr                97603869e6248a8e5d8501563a11b114
 argb                9e50e6ef02c83f28e97865a1f46ddfcd
+ayuv                3dfdbbbf605d83629f3ef4c96210b074
 ayuv64le            6f45f683e99ddf4180c7c7f47719efcc
 bgr0                590dcd1297d1dd4541eea217381db604
 bgr24               73afe7b447b083a7c2d682abe8dd451a
diff --git a/tests/ref/fate/filter-pixfmts-null b/tests/ref/fate/filter-pixfmts-null
index 120129dc1e..dc165b5ec5 100644
--- a/tests/ref/fate/filter-pixfmts-null
+++ b/tests/ref/fate/filter-pixfmts-null
@@ -2,6 +2,7 @@ 
 0rgb                527ef3d164c8fd0700493733959689c2
 abgr                023ecf6396d324edb113e4a483b79ba2
 argb                f003b555ef429222005d33844cca9325
+ayuv                eb7e43cfbb961d1e369311d0f58b9f52
 ayuv64le            07b9c969dfbe4add4c0626773b151d4f
 bgr0                6fcd67c8e6cec723dab21c70cf53dc16
 bgr24               4cff3814819f02ecf5824edfd768d2b1
diff --git a/tests/ref/fate/filter-pixfmts-pad b/tests/ref/fate/filter-pixfmts-pad
index abf4fceb46..5d8111b623 100644
--- a/tests/ref/fate/filter-pixfmts-pad
+++ b/tests/ref/fate/filter-pixfmts-pad
@@ -2,6 +2,7 @@ 
 0rgb                8e076dd0f8a9f4652595dffe3544f0f0
 abgr                52738042432893de555e6a3833172806
 argb                2a10108ac524b422b8a2393c064b3eab
+ayuv                ddb44f03af05b0cb01dced60641a4160
 bgr0                025d4d5e5691801ba39bc9de70e39df0
 bgr24               f8b65ad845905c7d0c93ca28dfbb826f
 bgr48le             4564b56dd1f2a9761b8a7a5244ac4c49
diff --git a/tests/ref/fate/filter-pixfmts-scale b/tests/ref/fate/filter-pixfmts-scale
index 2052d3f2f0..41a6fe76e7 100644
--- a/tests/ref/fate/filter-pixfmts-scale
+++ b/tests/ref/fate/filter-pixfmts-scale
@@ -2,6 +2,7 @@ 
 0rgb                80a58af8c639743307207ab4b69ca863
 abgr                63f2eaa8712ea6108985f4a0b83587c9
 argb                f0e17c71a40643c33a5bcfb481f6d8f8
+ayuv                9992b829065e2ea4a44de3a25766ca9b
 ayuv64le            59fb016f9874062d0be77cb3920ffed2
 bgr0                243d58ca64f97b2f415b4c63cb79f0e1
 bgr24               18744aaab4b8bce065a7144dc0ccf921
diff --git a/tests/ref/fate/filter-pixfmts-transpose b/tests/ref/fate/filter-pixfmts-transpose
index 0a395f7e71..bcda0e6460 100644
--- a/tests/ref/fate/filter-pixfmts-transpose
+++ b/tests/ref/fate/filter-pixfmts-transpose
@@ -2,6 +2,7 @@ 
 0rgb                cf1bedd0784a3efd3ab00c4e44005c37
 abgr                6d6f896f853a6c6f93ee70dba9af3d17
 argb                87bbd23debb94d486ac3a6b6c0b005f9
+ayuv                fe3d4a2d4d37deb94e6f447edc77f199
 ayuv64le            e4c07e0d5b333b3bc9eb4f3ce6af3a2c
 bgr0                df3a6eedd4939ce09a357b655ac2962a
 bgr24               f9a08135e5d58c0b2a5509c369a88414
diff --git a/tests/ref/fate/filter-pixfmts-vflip b/tests/ref/fate/filter-pixfmts-vflip
index 5ba28917a7..bb8ad46704 100644
--- a/tests/ref/fate/filter-pixfmts-vflip
+++ b/tests/ref/fate/filter-pixfmts-vflip
@@ -2,6 +2,7 @@ 
 0rgb                76b792f8ce8a72925e04294dc2f25b36
 abgr                8b94f489e68802d76f1e2844688a4911
 argb                3fd6af7ef2364d8aa845d45db289a04a
+ayuv                928ac9b09b90dec1d8bc3ecd13b75a60
 ayuv64le            558671dd31d0754cfa6344eaf441df78
 bgr0                7117438cf000254610f23625265769b5
 bgr24               52b2c21cbc166978a38a646c354b6858