diff mbox series

[FFmpeg-devel,v1,3/6] swscale: Add explicit rgb24->yv12 conversion

Message ID 20230820151022.2204421-4-jc@kynesim.co.uk
State New
Headers show
Series swscale: Add dedicated RGB->YUV unscaled functions & aarch64 asm | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

John Cox Aug. 20, 2023, 3:10 p.m. UTC
Add a rgb24->yuv420p conversion. Uses the same code as the existing
bgr24->yuv converter but permutes the conversion array to swap R & B
coefficients.

Signed-off-by: John Cox <jc@kynesim.co.uk>
---
 libswscale/rgb2rgb.c          |  5 +++++
 libswscale/rgb2rgb.h          |  7 +++++++
 libswscale/rgb2rgb_template.c | 38 ++++++++++++++++++++++++++++++-----
 libswscale/swscale_unscaled.c | 24 +++++++++++++++++++++-
 4 files changed, 68 insertions(+), 6 deletions(-)

Comments

Michael Niedermayer Aug. 20, 2023, 5:16 p.m. UTC | #1
On Sun, Aug 20, 2023 at 03:10:19PM +0000, John Cox wrote:
> Add a rgb24->yuv420p conversion. Uses the same code as the existing
> bgr24->yuv converter but permutes the conversion array to swap R & B
> coefficients.
> 
> Signed-off-by: John Cox <jc@kynesim.co.uk>
> ---
>  libswscale/rgb2rgb.c          |  5 +++++
>  libswscale/rgb2rgb.h          |  7 +++++++
>  libswscale/rgb2rgb_template.c | 38 ++++++++++++++++++++++++++++++-----
>  libswscale/swscale_unscaled.c | 24 +++++++++++++++++++++-
>  4 files changed, 68 insertions(+), 6 deletions(-)
> 
> diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
> index 8707917800..de90e5193f 100644
> --- a/libswscale/rgb2rgb.c
> +++ b/libswscale/rgb2rgb.c
> @@ -83,6 +83,11 @@ void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst,
>                         int width, int height,
>                         int lumStride, int chromStride, int srcStride,
>                         int32_t *rgb2yuv);
> +void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst,
> +                       uint8_t *udst, uint8_t *vdst,
> +                       int width, int height,
> +                       int lumStride, int chromStride, int srcStride,
> +                       int32_t *rgb2yuv);
>  void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
>                   int srcStride, int dstStride);
>  void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst,
> diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
> index 305b830920..f7a76a92ba 100644
> --- a/libswscale/rgb2rgb.h
> +++ b/libswscale/rgb2rgb.h
> @@ -79,6 +79,9 @@ void    rgb12to15(const uint8_t *src, uint8_t *dst, int src_size);
>  void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
>                        uint8_t *vdst, int width, int height, int lumStride,
>                        int chromStride, int srcStride, int32_t *rgb2yuv);
> +void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
> +                      uint8_t *vdst, int width, int height, int lumStride,
> +                      int chromStride, int srcStride, int32_t *rgb2yuv);
>  
>  /**
>   * Height should be a multiple of 2 and width should be a multiple of 16.
> @@ -128,6 +131,10 @@ extern void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
>                                int width, int height,
>                                int lumStride, int chromStride, int srcStride,
>                                int32_t *rgb2yuv);
> +extern void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
> +                              int width, int height,
> +                              int lumStride, int chromStride, int srcStride,
> +                              int32_t *rgb2yuv);
>  extern void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
>                          int srcStride, int dstStride);
>  
> diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
> index 8ef4a2cf5d..e57bfa6545 100644
> --- a/libswscale/rgb2rgb_template.c
> +++ b/libswscale/rgb2rgb_template.c


> @@ -646,13 +646,14 @@ static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
>   * others are ignored in the C version.
>   * FIXME: Write HQ version.
>   */
> -void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
> +static void rgb24toyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst,

this probably should be inline

also i see now "FIXME: Write HQ version." above here. Do you really want to
add a low quality rgb24toyv12 ?
(it is vissible on the diagonal border (cyan / red )) in
 ./ffmpeg -f lavfi -i testsrc=size=5632x3168 -pix_fmt yuv420p -vframes 1 -qscale 1 -strict -1 new.jpg

 also on smaller sizes but for some reason its clearer on the big one zoomed in 400% with gimp
(the gimp test was done with the whole patchset not after this patch)


[...]
> diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
> index 32e0d7f63c..751bdcb2e4 100644
> --- a/libswscale/swscale_unscaled.c
> +++ b/libswscale/swscale_unscaled.c
> @@ -1654,6 +1654,23 @@ static int bgr24ToYv12Wrapper(SwsContext *c, const uint8_t *src[],
>      return srcSliceH;
>  }
>  
> +static int rgb24ToYv12Wrapper(SwsContext *c, const uint8_t *src[],
> +                              int srcStride[], int srcSliceY, int srcSliceH,
> +                              uint8_t *dst[], int dstStride[])
> +{
> +    ff_rgb24toyv12(
> +        src[0],
> +        dst[0] +  srcSliceY       * dstStride[0],
> +        dst[1] + (srcSliceY >> 1) * dstStride[1],
> +        dst[2] + (srcSliceY >> 1) * dstStride[2],
> +        c->srcW, srcSliceH,
> +        dstStride[0], dstStride[1], srcStride[0],
> +        c->input_rgb2yuv_table);
> +    if (dst[3])
> +        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
> +    return srcSliceH;
> +}
> +
>  static int yvu9ToYv12Wrapper(SwsContext *c, const uint8_t *src[],
>                               int srcStride[], int srcSliceY, int srcSliceH,
>                               uint8_t *dst[], int dstStride[])

> @@ -2035,8 +2052,13 @@ void ff_get_unscaled_swscale(SwsContext *c)
>      /* bgr24toYV12 */
>      if (srcFormat == AV_PIX_FMT_BGR24 &&
>          (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) &&
> -        !(flags & SWS_ACCURATE_RND) && !(dstW&1))
> +        !(flags & (SWS_ACCURATE_RND | SWS_BITEXACT)) && !(dstW&1))

this doesnt belong in this patch

thx

[...]
Michael Niedermayer Aug. 20, 2023, 5:45 p.m. UTC | #2
On Sun, Aug 20, 2023 at 07:16:14PM +0200, Michael Niedermayer wrote:
> On Sun, Aug 20, 2023 at 03:10:19PM +0000, John Cox wrote:
> > Add a rgb24->yuv420p conversion. Uses the same code as the existing
> > bgr24->yuv converter but permutes the conversion array to swap R & B
> > coefficients.
> > 
> > Signed-off-by: John Cox <jc@kynesim.co.uk>
> > ---
> >  libswscale/rgb2rgb.c          |  5 +++++
> >  libswscale/rgb2rgb.h          |  7 +++++++
> >  libswscale/rgb2rgb_template.c | 38 ++++++++++++++++++++++++++++++-----
> >  libswscale/swscale_unscaled.c | 24 +++++++++++++++++++++-
> >  4 files changed, 68 insertions(+), 6 deletions(-)
> > 
> > diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
> > index 8707917800..de90e5193f 100644
> > --- a/libswscale/rgb2rgb.c
> > +++ b/libswscale/rgb2rgb.c
> > @@ -83,6 +83,11 @@ void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst,
> >                         int width, int height,
> >                         int lumStride, int chromStride, int srcStride,
> >                         int32_t *rgb2yuv);
> > +void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst,
> > +                       uint8_t *udst, uint8_t *vdst,
> > +                       int width, int height,
> > +                       int lumStride, int chromStride, int srcStride,
> > +                       int32_t *rgb2yuv);
> >  void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
> >                   int srcStride, int dstStride);
> >  void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst,
> > diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
> > index 305b830920..f7a76a92ba 100644
> > --- a/libswscale/rgb2rgb.h
> > +++ b/libswscale/rgb2rgb.h
> > @@ -79,6 +79,9 @@ void    rgb12to15(const uint8_t *src, uint8_t *dst, int src_size);
> >  void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
> >                        uint8_t *vdst, int width, int height, int lumStride,
> >                        int chromStride, int srcStride, int32_t *rgb2yuv);
> > +void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
> > +                      uint8_t *vdst, int width, int height, int lumStride,
> > +                      int chromStride, int srcStride, int32_t *rgb2yuv);
> >  
> >  /**
> >   * Height should be a multiple of 2 and width should be a multiple of 16.
> > @@ -128,6 +131,10 @@ extern void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
> >                                int width, int height,
> >                                int lumStride, int chromStride, int srcStride,
> >                                int32_t *rgb2yuv);
> > +extern void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
> > +                              int width, int height,
> > +                              int lumStride, int chromStride, int srcStride,
> > +                              int32_t *rgb2yuv);
> >  extern void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
> >                          int srcStride, int dstStride);
> >  
> > diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
> > index 8ef4a2cf5d..e57bfa6545 100644
> > --- a/libswscale/rgb2rgb_template.c
> > +++ b/libswscale/rgb2rgb_template.c
> 
> 
> > @@ -646,13 +646,14 @@ static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
> >   * others are ignored in the C version.
> >   * FIXME: Write HQ version.
> >   */
> > -void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
> > +static void rgb24toyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
> 
> this probably should be inline
> 
> also i see now "FIXME: Write HQ version." above here. Do you really want to
> add a low quality rgb24toyv12 ?
> (it is vissible on the diagonal border (cyan / red )) in
>  ./ffmpeg -f lavfi -i testsrc=size=5632x3168 -pix_fmt yuv420p -vframes 1 -qscale 1 -strict -1 new.jpg
> 
>  also on smaller sizes but for some reason its clearer on the big one zoomed in 400% with gimp
> (the gimp test was done with the whole patchset not after this patch)

Also the reason why its LQ and looks like it does is because
1. half the RGB samples are ignored in computing the chroma samples
2. the chroma sample locations are ignored, the locations for yuv420 are reaonable standard

this needs some simple filter to get from a few RGB samples to the RGB sample co-located
with ths UV sample before RGB->UV

thx

[...]
John Cox Aug. 20, 2023, 6:09 p.m. UTC | #3
On Sun, 20 Aug 2023 19:16:14 +0200, you wrote:

>On Sun, Aug 20, 2023 at 03:10:19PM +0000, John Cox wrote:
>> Add a rgb24->yuv420p conversion. Uses the same code as the existing
>> bgr24->yuv converter but permutes the conversion array to swap R & B
>> coefficients.
>> 
>> Signed-off-by: John Cox <jc@kynesim.co.uk>
>> ---
>>  libswscale/rgb2rgb.c          |  5 +++++
>>  libswscale/rgb2rgb.h          |  7 +++++++
>>  libswscale/rgb2rgb_template.c | 38 ++++++++++++++++++++++++++++++-----
>>  libswscale/swscale_unscaled.c | 24 +++++++++++++++++++++-
>>  4 files changed, 68 insertions(+), 6 deletions(-)
>> 
>> diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
>> index 8707917800..de90e5193f 100644
>> --- a/libswscale/rgb2rgb.c
>> +++ b/libswscale/rgb2rgb.c
>> @@ -83,6 +83,11 @@ void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst,
>>                         int width, int height,
>>                         int lumStride, int chromStride, int srcStride,
>>                         int32_t *rgb2yuv);
>> +void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst,
>> +                       uint8_t *udst, uint8_t *vdst,
>> +                       int width, int height,
>> +                       int lumStride, int chromStride, int srcStride,
>> +                       int32_t *rgb2yuv);
>>  void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
>>                   int srcStride, int dstStride);
>>  void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst,
>> diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
>> index 305b830920..f7a76a92ba 100644
>> --- a/libswscale/rgb2rgb.h
>> +++ b/libswscale/rgb2rgb.h
>> @@ -79,6 +79,9 @@ void    rgb12to15(const uint8_t *src, uint8_t *dst, int src_size);
>>  void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
>>                        uint8_t *vdst, int width, int height, int lumStride,
>>                        int chromStride, int srcStride, int32_t *rgb2yuv);
>> +void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
>> +                      uint8_t *vdst, int width, int height, int lumStride,
>> +                      int chromStride, int srcStride, int32_t *rgb2yuv);
>>  
>>  /**
>>   * Height should be a multiple of 2 and width should be a multiple of 16.
>> @@ -128,6 +131,10 @@ extern void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
>>                                int width, int height,
>>                                int lumStride, int chromStride, int srcStride,
>>                                int32_t *rgb2yuv);
>> +extern void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
>> +                              int width, int height,
>> +                              int lumStride, int chromStride, int srcStride,
>> +                              int32_t *rgb2yuv);
>>  extern void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
>>                          int srcStride, int dstStride);
>>  
>> diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
>> index 8ef4a2cf5d..e57bfa6545 100644
>> --- a/libswscale/rgb2rgb_template.c
>> +++ b/libswscale/rgb2rgb_template.c
>
>
>> @@ -646,13 +646,14 @@ static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
>>   * others are ignored in the C version.
>>   * FIXME: Write HQ version.
>>   */
>> -void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
>> +static void rgb24toyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
>
>this probably should be inline

Could do, and I will if you deem it important, but the only bit that
inline is going to help is the matrix coefficient loading and that
happens once outside the main loops.

>also i see now "FIXME: Write HQ version." above here. Do you really want to
>add a low quality rgb24toyv12 ?
>(it is vissible on the diagonal border (cyan / red )) in
> ./ffmpeg -f lavfi -i testsrc=size=5632x3168 -pix_fmt yuv420p -vframes 1 -qscale 1 -strict -1 new.jpg
>
> also on smaller sizes but for some reason its clearer on the big one zoomed in 400% with gimp
>(the gimp test was done with the whole patchset not after this patch)

On the whole - yes - in the encode path on the Pi that I'm writing this
for speed is more important than quality - the existing path is too slow
to be usable. And honestly - using your example above comparing (Windows
photo viewer zoomed in s.t. pixels are clearly individually visible) the
general (bitexact), presumably HQ, output vs the new code I grant that
the new is slightly muckier but not by a huge amount - sharp chroma
transitions in 420 are always nasty.

>[...]
>> diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
>> index 32e0d7f63c..751bdcb2e4 100644
>> --- a/libswscale/swscale_unscaled.c
>> +++ b/libswscale/swscale_unscaled.c
>> @@ -1654,6 +1654,23 @@ static int bgr24ToYv12Wrapper(SwsContext *c, const uint8_t *src[],
>>      return srcSliceH;
>>  }
>>  
>> +static int rgb24ToYv12Wrapper(SwsContext *c, const uint8_t *src[],
>> +                              int srcStride[], int srcSliceY, int srcSliceH,
>> +                              uint8_t *dst[], int dstStride[])
>> +{
>> +    ff_rgb24toyv12(
>> +        src[0],
>> +        dst[0] +  srcSliceY       * dstStride[0],
>> +        dst[1] + (srcSliceY >> 1) * dstStride[1],
>> +        dst[2] + (srcSliceY >> 1) * dstStride[2],
>> +        c->srcW, srcSliceH,
>> +        dstStride[0], dstStride[1], srcStride[0],
>> +        c->input_rgb2yuv_table);
>> +    if (dst[3])
>> +        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
>> +    return srcSliceH;
>> +}
>> +
>>  static int yvu9ToYv12Wrapper(SwsContext *c, const uint8_t *src[],
>>                               int srcStride[], int srcSliceY, int srcSliceH,
>>                               uint8_t *dst[], int dstStride[])
>
>> @@ -2035,8 +2052,13 @@ void ff_get_unscaled_swscale(SwsContext *c)
>>      /* bgr24toYV12 */
>>      if (srcFormat == AV_PIX_FMT_BGR24 &&
>>          (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) &&
>> -        !(flags & SWS_ACCURATE_RND) && !(dstW&1))
>> +        !(flags & (SWS_ACCURATE_RND | SWS_BITEXACT)) && !(dstW&1))
>
>this doesnt belong in this patch

So should it go in its own patch, or attached to some other patch?

Ta
John Cox Aug. 20, 2023, 6:28 p.m. UTC | #4
On Sun, 20 Aug 2023 19:45:11 +0200, you wrote:

>On Sun, Aug 20, 2023 at 07:16:14PM +0200, Michael Niedermayer wrote:
>> On Sun, Aug 20, 2023 at 03:10:19PM +0000, John Cox wrote:
>> > Add a rgb24->yuv420p conversion. Uses the same code as the existing
>> > bgr24->yuv converter but permutes the conversion array to swap R & B
>> > coefficients.
>> > 
>> > Signed-off-by: John Cox <jc@kynesim.co.uk>
>> > ---
>> >  libswscale/rgb2rgb.c          |  5 +++++
>> >  libswscale/rgb2rgb.h          |  7 +++++++
>> >  libswscale/rgb2rgb_template.c | 38 ++++++++++++++++++++++++++++++-----
>> >  libswscale/swscale_unscaled.c | 24 +++++++++++++++++++++-
>> >  4 files changed, 68 insertions(+), 6 deletions(-)
>> > 
>> > diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
>> > index 8707917800..de90e5193f 100644
>> > --- a/libswscale/rgb2rgb.c
>> > +++ b/libswscale/rgb2rgb.c
>> > @@ -83,6 +83,11 @@ void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst,
>> >                         int width, int height,
>> >                         int lumStride, int chromStride, int srcStride,
>> >                         int32_t *rgb2yuv);
>> > +void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst,
>> > +                       uint8_t *udst, uint8_t *vdst,
>> > +                       int width, int height,
>> > +                       int lumStride, int chromStride, int srcStride,
>> > +                       int32_t *rgb2yuv);
>> >  void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
>> >                   int srcStride, int dstStride);
>> >  void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst,
>> > diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
>> > index 305b830920..f7a76a92ba 100644
>> > --- a/libswscale/rgb2rgb.h
>> > +++ b/libswscale/rgb2rgb.h
>> > @@ -79,6 +79,9 @@ void    rgb12to15(const uint8_t *src, uint8_t *dst, int src_size);
>> >  void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
>> >                        uint8_t *vdst, int width, int height, int lumStride,
>> >                        int chromStride, int srcStride, int32_t *rgb2yuv);
>> > +void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
>> > +                      uint8_t *vdst, int width, int height, int lumStride,
>> > +                      int chromStride, int srcStride, int32_t *rgb2yuv);
>> >  
>> >  /**
>> >   * Height should be a multiple of 2 and width should be a multiple of 16.
>> > @@ -128,6 +131,10 @@ extern void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
>> >                                int width, int height,
>> >                                int lumStride, int chromStride, int srcStride,
>> >                                int32_t *rgb2yuv);
>> > +extern void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
>> > +                              int width, int height,
>> > +                              int lumStride, int chromStride, int srcStride,
>> > +                              int32_t *rgb2yuv);
>> >  extern void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
>> >                          int srcStride, int dstStride);
>> >  
>> > diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
>> > index 8ef4a2cf5d..e57bfa6545 100644
>> > --- a/libswscale/rgb2rgb_template.c
>> > +++ b/libswscale/rgb2rgb_template.c
>> 
>> 
>> > @@ -646,13 +646,14 @@ static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
>> >   * others are ignored in the C version.
>> >   * FIXME: Write HQ version.
>> >   */
>> > -void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
>> > +static void rgb24toyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
>> 
>> this probably should be inline
>> 
>> also i see now "FIXME: Write HQ version." above here. Do you really want to
>> add a low quality rgb24toyv12 ?
>> (it is vissible on the diagonal border (cyan / red )) in
>>  ./ffmpeg -f lavfi -i testsrc=size=5632x3168 -pix_fmt yuv420p -vframes 1 -qscale 1 -strict -1 new.jpg
>> 
>>  also on smaller sizes but for some reason its clearer on the big one zoomed in 400% with gimp
>> (the gimp test was done with the whole patchset not after this patch)
>
>Also the reason why its LQ and looks like it does is because
>1. half the RGB samples are ignored in computing the chroma samples

I thought it was a bit light but it is what the existing code did

>2. the chroma sample locations are ignored, the locations for yuv420 are reaonable standard

As I recall MPEG-1 has chroma at (0.5, 0.5), MPEG-II defaults to (0.5,
0), H.265 defaults to (0,0). Printing out dst_h_chr_pos, dst_v_chr_pos
in the setup of your example yields -513, 128 which I'm guessing means
(unset, 0.5) - am I looking at the correct vars?

>this needs some simple filter to get from a few RGB samples to the RGB sample co-located
>with ths UV sample before RGB->UV

I can get to simple bilinear without adding so much complexity that I
lose the speed I need - would that be OK?

Ta

>thx
>
>[...]
Michael Niedermayer Aug. 21, 2023, 7:15 p.m. UTC | #5
On Sun, Aug 20, 2023 at 07:28:40PM +0100, John Cox wrote:
> On Sun, 20 Aug 2023 19:45:11 +0200, you wrote:
> 
> >On Sun, Aug 20, 2023 at 07:16:14PM +0200, Michael Niedermayer wrote:
> >> On Sun, Aug 20, 2023 at 03:10:19PM +0000, John Cox wrote:
> >> > Add a rgb24->yuv420p conversion. Uses the same code as the existing
> >> > bgr24->yuv converter but permutes the conversion array to swap R & B
> >> > coefficients.
> >> > 
> >> > Signed-off-by: John Cox <jc@kynesim.co.uk>
> >> > ---
> >> >  libswscale/rgb2rgb.c          |  5 +++++
> >> >  libswscale/rgb2rgb.h          |  7 +++++++
> >> >  libswscale/rgb2rgb_template.c | 38 ++++++++++++++++++++++++++++++-----
> >> >  libswscale/swscale_unscaled.c | 24 +++++++++++++++++++++-
> >> >  4 files changed, 68 insertions(+), 6 deletions(-)
> >> > 
> >> > diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
> >> > index 8707917800..de90e5193f 100644
> >> > --- a/libswscale/rgb2rgb.c
> >> > +++ b/libswscale/rgb2rgb.c
> >> > @@ -83,6 +83,11 @@ void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst,
> >> >                         int width, int height,
> >> >                         int lumStride, int chromStride, int srcStride,
> >> >                         int32_t *rgb2yuv);
> >> > +void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst,
> >> > +                       uint8_t *udst, uint8_t *vdst,
> >> > +                       int width, int height,
> >> > +                       int lumStride, int chromStride, int srcStride,
> >> > +                       int32_t *rgb2yuv);
> >> >  void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
> >> >                   int srcStride, int dstStride);
> >> >  void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst,
> >> > diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
> >> > index 305b830920..f7a76a92ba 100644
> >> > --- a/libswscale/rgb2rgb.h
> >> > +++ b/libswscale/rgb2rgb.h
> >> > @@ -79,6 +79,9 @@ void    rgb12to15(const uint8_t *src, uint8_t *dst, int src_size);
> >> >  void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
> >> >                        uint8_t *vdst, int width, int height, int lumStride,
> >> >                        int chromStride, int srcStride, int32_t *rgb2yuv);
> >> > +void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
> >> > +                      uint8_t *vdst, int width, int height, int lumStride,
> >> > +                      int chromStride, int srcStride, int32_t *rgb2yuv);
> >> >  
> >> >  /**
> >> >   * Height should be a multiple of 2 and width should be a multiple of 16.
> >> > @@ -128,6 +131,10 @@ extern void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
> >> >                                int width, int height,
> >> >                                int lumStride, int chromStride, int srcStride,
> >> >                                int32_t *rgb2yuv);
> >> > +extern void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
> >> > +                              int width, int height,
> >> > +                              int lumStride, int chromStride, int srcStride,
> >> > +                              int32_t *rgb2yuv);
> >> >  extern void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
> >> >                          int srcStride, int dstStride);
> >> >  
> >> > diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
> >> > index 8ef4a2cf5d..e57bfa6545 100644
> >> > --- a/libswscale/rgb2rgb_template.c
> >> > +++ b/libswscale/rgb2rgb_template.c
> >> 
> >> 
> >> > @@ -646,13 +646,14 @@ static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
> >> >   * others are ignored in the C version.
> >> >   * FIXME: Write HQ version.
> >> >   */
> >> > -void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
> >> > +static void rgb24toyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
> >> 
> >> this probably should be inline
> >> 
> >> also i see now "FIXME: Write HQ version." above here. Do you really want to
> >> add a low quality rgb24toyv12 ?
> >> (it is vissible on the diagonal border (cyan / red )) in
> >>  ./ffmpeg -f lavfi -i testsrc=size=5632x3168 -pix_fmt yuv420p -vframes 1 -qscale 1 -strict -1 new.jpg
> >> 
> >>  also on smaller sizes but for some reason its clearer on the big one zoomed in 400% with gimp
> >> (the gimp test was done with the whole patchset not after this patch)
> >
> >Also the reason why its LQ and looks like it does is because
> >1. half the RGB samples are ignored in computing the chroma samples
> 
> I thought it was a bit light but it is what the existing code did
> 
> >2. the chroma sample locations are ignored, the locations for yuv420 are reaonable standard
> 
> As I recall MPEG-1 has chroma at (0.5, 0.5), MPEG-II defaults to (0.5,
> 0),

yes


> H.265 defaults to (0,0).

hmm
    When the value of chroma_format_idc is equal to 1, the nominal vertical and horizontal relative locations of luma and
    chroma samples in pictures are shown in Figure 6-1. Alternative chroma sample relative locations may be indicated in
    video usability information (see Annex E).

    X  X  X  X  X  X
    O     O     O    ...
    X  X  X  X  X  X

    X  X  X  X  X  X
    O     O     O
    X  X  X  X  X  X

    X  X  X  X  X  X
    O     O     O
    X  X  X  X  X  X
    .                .
    :                 ´.
    X Location of luma sample
    O Location of chroma sample

    Figure 6-1 – Nominal vertical and horizontal locations of 4:2:0 luma and chroma samples in a picture



> Printing out dst_h_chr_pos, dst_v_chr_pos
> in the setup of your example yields -513, 128 which I'm guessing means
> (unset, 0.5) - am I looking at the correct vars?
> 
> >this needs some simple filter to get from a few RGB samples to the RGB sample co-located
> >with ths UV sample before RGB->UV
> 

> I can get to simple bilinear without adding so much complexity that I
> lose the speed I need - would that be OK?

Not sure simple bilinear is 100% clearly defined
I think it could mean 3 things

1 2 1
  C
1 2 1

or

  1
  C
  1

  or

1 2 1

3 6 3
  C
3 6 3

1 2 1

I think the 6 and 12 tap cases would produce ok results teh 2 tap not
Also maybe there are more finetuned filters for this specific case, i dont
know / didnt look.
Testing these probably would not be a bad idea before implementation

I think users in 2023 expect the default to be better than what the
existing code was doing by default
so feel free to replace the existing "identical" code too

[...]

thx
John Cox Aug. 22, 2023, 2:24 p.m. UTC | #6
On Mon, 21 Aug 2023 21:15:37 +0200, you wrote:

>On Sun, Aug 20, 2023 at 07:28:40PM +0100, John Cox wrote:
>> On Sun, 20 Aug 2023 19:45:11 +0200, you wrote:
>> 
>> >On Sun, Aug 20, 2023 at 07:16:14PM +0200, Michael Niedermayer wrote:
>> >> On Sun, Aug 20, 2023 at 03:10:19PM +0000, John Cox wrote:
>> >> > Add a rgb24->yuv420p conversion. Uses the same code as the existing
>> >> > bgr24->yuv converter but permutes the conversion array to swap R & B
>> >> > coefficients.
>> >> > 
>> >> > Signed-off-by: John Cox <jc@kynesim.co.uk>
>> >> > ---
>> >> >  libswscale/rgb2rgb.c          |  5 +++++
>> >> >  libswscale/rgb2rgb.h          |  7 +++++++
>> >> >  libswscale/rgb2rgb_template.c | 38 ++++++++++++++++++++++++++++++-----
>> >> >  libswscale/swscale_unscaled.c | 24 +++++++++++++++++++++-
>> >> >  4 files changed, 68 insertions(+), 6 deletions(-)
>> >> > 
>> >> > diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
>> >> > index 8707917800..de90e5193f 100644
>> >> > --- a/libswscale/rgb2rgb.c
>> >> > +++ b/libswscale/rgb2rgb.c
>> >> > @@ -83,6 +83,11 @@ void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst,
>> >> >                         int width, int height,
>> >> >                         int lumStride, int chromStride, int srcStride,
>> >> >                         int32_t *rgb2yuv);
>> >> > +void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst,
>> >> > +                       uint8_t *udst, uint8_t *vdst,
>> >> > +                       int width, int height,
>> >> > +                       int lumStride, int chromStride, int srcStride,
>> >> > +                       int32_t *rgb2yuv);
>> >> >  void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
>> >> >                   int srcStride, int dstStride);
>> >> >  void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst,
>> >> > diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
>> >> > index 305b830920..f7a76a92ba 100644
>> >> > --- a/libswscale/rgb2rgb.h
>> >> > +++ b/libswscale/rgb2rgb.h
>> >> > @@ -79,6 +79,9 @@ void    rgb12to15(const uint8_t *src, uint8_t *dst, int src_size);
>> >> >  void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
>> >> >                        uint8_t *vdst, int width, int height, int lumStride,
>> >> >                        int chromStride, int srcStride, int32_t *rgb2yuv);
>> >> > +void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
>> >> > +                      uint8_t *vdst, int width, int height, int lumStride,
>> >> > +                      int chromStride, int srcStride, int32_t *rgb2yuv);
>> >> >  
>> >> >  /**
>> >> >   * Height should be a multiple of 2 and width should be a multiple of 16.
>> >> > @@ -128,6 +131,10 @@ extern void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
>> >> >                                int width, int height,
>> >> >                                int lumStride, int chromStride, int srcStride,
>> >> >                                int32_t *rgb2yuv);
>> >> > +extern void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
>> >> > +                              int width, int height,
>> >> > +                              int lumStride, int chromStride, int srcStride,
>> >> > +                              int32_t *rgb2yuv);
>> >> >  extern void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
>> >> >                          int srcStride, int dstStride);
>> >> >  
>> >> > diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
>> >> > index 8ef4a2cf5d..e57bfa6545 100644
>> >> > --- a/libswscale/rgb2rgb_template.c
>> >> > +++ b/libswscale/rgb2rgb_template.c
>> >> 
>> >> 
>> >> > @@ -646,13 +646,14 @@ static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
>> >> >   * others are ignored in the C version.
>> >> >   * FIXME: Write HQ version.
>> >> >   */
>> >> > -void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
>> >> > +static void rgb24toyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
>> >> 
>> >> this probably should be inline
>> >> 
>> >> also i see now "FIXME: Write HQ version." above here. Do you really want to
>> >> add a low quality rgb24toyv12 ?
>> >> (it is vissible on the diagonal border (cyan / red )) in
>> >>  ./ffmpeg -f lavfi -i testsrc=size=5632x3168 -pix_fmt yuv420p -vframes 1 -qscale 1 -strict -1 new.jpg
>> >> 
>> >>  also on smaller sizes but for some reason its clearer on the big one zoomed in 400% with gimp
>> >> (the gimp test was done with the whole patchset not after this patch)
>> >
>> >Also the reason why its LQ and looks like it does is because
>> >1. half the RGB samples are ignored in computing the chroma samples
>> 
>> I thought it was a bit light but it is what the existing code did
>> 
>> >2. the chroma sample locations are ignored, the locations for yuv420 are reaonable standard
>> 
>> As I recall MPEG-1 has chroma at (0.5, 0.5), MPEG-II defaults to (0.5,
>> 0),
>
>yes
>
>
>> H.265 defaults to (0,0).
>
>hmm
>    When the value of chroma_format_idc is equal to 1, the nominal vertical and horizontal relative locations of luma and
>    chroma samples in pictures are shown in Figure 6-1. Alternative chroma sample relative locations may be indicated in
>    video usability information (see Annex E).
>
>    X  X  X  X  X  X
>    O     O     O    ...
>    X  X  X  X  X  X
>
>    X  X  X  X  X  X
>    O     O     O
>    X  X  X  X  X  X
>
>    X  X  X  X  X  X
>    O     O     O
>    X  X  X  X  X  X
>    .                .
>    :                 ´.
>    X Location of luma sample
>    O Location of chroma sample
>
>    Figure 6-1 – Nominal vertical and horizontal locations of 4:2:0 luma and chroma samples in a picture

You are right - I was remembering the special case for BT2020 ("When
chroma_format_idc is equal to 1 (4:2:0 chroma format) and the decoded
video content is intended for interpretation according to Rec. ITU-R
BT.2020-2 or Rec. ITU-R BT.2100-2, chroma_loc_info_present_flag should
be equal to 1, and chroma_sample_loc_type_top_field and
chroma_sample_loc_type_bottom_field should both be equal to 2")

>> Printing out dst_h_chr_pos, dst_v_chr_pos
>> in the setup of your example yields -513, 128 which I'm guessing means
>> (unset, 0.5) - am I looking at the correct vars?
>> 
>> >this needs some simple filter to get from a few RGB samples to the RGB sample co-located
>> >with ths UV sample before RGB->UV
>> 
>
>> I can get to simple bilinear without adding so much complexity that I
>> lose the speed I need - would that be OK?
>
>Not sure simple bilinear is 100% clearly defined
>I think it could mean 3 things
>
>1 2 1
>  C
>1 2 1
>
>or
>
>  1
>  C
>  1
>
>  or
>
>1 2 1
>
>3 6 3
>  C
>3 6 3
>
>1 2 1
>
>I think the 6 and 12 tap cases would produce ok results teh 2 tap not
>Also maybe there are more finetuned filters for this specific case, i dont
>know / didnt look.
>Testing these probably would not be a bad idea before implementation
>
>I think users in 2023 expect the default to be better than what the
>existing code was doing by default
>so feel free to replace the existing "identical" code too

I was thinking of 2-tap (in both X & Y) which is equivalent to
SWS_FAST_BILINEAR in ffmpeg. In the case I'm looking at I need the speed
more than I need the quality and I'm quite happy to gate them behind a
test for SWS_FAST_BILINEAR.

As an aside, with SWS_FAST_BILINEAR (and probably the other methods) in
ffmpeg you need flags=out_v_chr_pos=0:out_h_chr_pos=128 to land the YUV
chroma sample on the top-left RGB sample - that confused me for a while
whilst I was trying to work out what ffmpeg actually does!

Regards

JC

>[...]
>
>thx
Michael Niedermayer Aug. 22, 2023, 6:03 p.m. UTC | #7
On Tue, Aug 22, 2023 at 03:24:17PM +0100, John Cox wrote:
> On Mon, 21 Aug 2023 21:15:37 +0200, you wrote:
[...]
> >> I can get to simple bilinear without adding so much complexity that I
> >> lose the speed I need - would that be OK?
> >
> >Not sure simple bilinear is 100% clearly defined
> >I think it could mean 3 things
> >
> >1 2 1
> >  C
> >1 2 1
> >
> >or
> >
> >  1
> >  C
> >  1
> >
> >  or
> >
> >1 2 1
> >
> >3 6 3
> >  C
> >3 6 3
> >
> >1 2 1
> >
> >I think the 6 and 12 tap cases would produce ok results teh 2 tap not
> >Also maybe there are more finetuned filters for this specific case, i dont
> >know / didnt look.
> >Testing these probably would not be a bad idea before implementation
> >
> >I think users in 2023 expect the default to be better than what the
> >existing code was doing by default
> >so feel free to replace the existing "identical" code too
> 
> I was thinking of 2-tap (in both X & Y) which is equivalent to
> SWS_FAST_BILINEAR in ffmpeg. In the case I'm looking at I need the speed
> more than I need the quality and I'm quite happy to gate them behind a
> test for SWS_FAST_BILINEAR.

ok but maybe you want to still fix/add the higher quality C version too?
it would be almost the same code, just a different mix of source samples
iam asking as you are already working on this

thx

[...]
diff mbox series

Patch

diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
index 8707917800..de90e5193f 100644
--- a/libswscale/rgb2rgb.c
+++ b/libswscale/rgb2rgb.c
@@ -83,6 +83,11 @@  void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst,
                        int width, int height,
                        int lumStride, int chromStride, int srcStride,
                        int32_t *rgb2yuv);
+void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst,
+                       uint8_t *udst, uint8_t *vdst,
+                       int width, int height,
+                       int lumStride, int chromStride, int srcStride,
+                       int32_t *rgb2yuv);
 void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
                  int srcStride, int dstStride);
 void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst,
diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
index 305b830920..f7a76a92ba 100644
--- a/libswscale/rgb2rgb.h
+++ b/libswscale/rgb2rgb.h
@@ -79,6 +79,9 @@  void    rgb12to15(const uint8_t *src, uint8_t *dst, int src_size);
 void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
                       uint8_t *vdst, int width, int height, int lumStride,
                       int chromStride, int srcStride, int32_t *rgb2yuv);
+void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+                      uint8_t *vdst, int width, int height, int lumStride,
+                      int chromStride, int srcStride, int32_t *rgb2yuv);
 
 /**
  * Height should be a multiple of 2 and width should be a multiple of 16.
@@ -128,6 +131,10 @@  extern void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
                               int width, int height,
                               int lumStride, int chromStride, int srcStride,
                               int32_t *rgb2yuv);
+extern void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                              int width, int height,
+                              int lumStride, int chromStride, int srcStride,
+                              int32_t *rgb2yuv);
 extern void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
                         int srcStride, int dstStride);
 
diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
index 8ef4a2cf5d..e57bfa6545 100644
--- a/libswscale/rgb2rgb_template.c
+++ b/libswscale/rgb2rgb_template.c
@@ -646,13 +646,14 @@  static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
  * others are ignored in the C version.
  * FIXME: Write HQ version.
  */
-void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+static void rgb24toyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
                    uint8_t *vdst, int width, int height, int lumStride,
-                   int chromStride, int srcStride, int32_t *rgb2yuv)
+                   int chromStride, int srcStride, int32_t *rgb2yuv,
+                   const uint8_t x[9])
 {
-    int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
-    int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
-    int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
+    int32_t ry = rgb2yuv[x[0]], gy = rgb2yuv[x[1]], by = rgb2yuv[x[2]];
+    int32_t ru = rgb2yuv[x[3]], gu = rgb2yuv[x[4]], bu = rgb2yuv[x[5]];
+    int32_t rv = rgb2yuv[x[6]], gv = rgb2yuv[x[7]], bv = rgb2yuv[x[8]];
     int y;
     const int chromWidth = width >> 1;
 
@@ -707,6 +708,32 @@  void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
     }
 }
 
+static const uint8_t x_bgr[9] = {
+    RY_IDX, GY_IDX, BY_IDX,
+    RU_IDX, GU_IDX, BU_IDX,
+    RV_IDX, GV_IDX, BV_IDX,
+};
+
+static const uint8_t x_rgb[9] = {
+     BY_IDX, GY_IDX, RY_IDX,
+     BU_IDX, GU_IDX, RU_IDX,
+     BV_IDX, GV_IDX, RV_IDX,
+};
+
+void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+                   uint8_t *vdst, int width, int height, int lumStride,
+                   int chromStride, int srcStride, int32_t *rgb2yuv)
+{
+    rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr);
+}
+
+void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+                   uint8_t *vdst, int width, int height, int lumStride,
+                   int chromStride, int srcStride, int32_t *rgb2yuv)
+{
+    rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb);
+}
+
 static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2,
                               uint8_t *dest, int width, int height,
                               int src1Stride, int src2Stride, int dstStride)
@@ -979,6 +1006,7 @@  static av_cold void rgb2rgb_init_c(void)
     yuv422ptouyvy      = yuv422ptouyvy_c;
     yuy2toyv12         = yuy2toyv12_c;
     planar2x           = planar2x_c;
+    ff_rgb24toyv12     = ff_rgb24toyv12_c;
     ff_bgr24toyv12     = ff_bgr24toyv12_c;
     interleaveBytes    = interleaveBytes_c;
     deinterleaveBytes  = deinterleaveBytes_c;
diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index 32e0d7f63c..751bdcb2e4 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -1654,6 +1654,23 @@  static int bgr24ToYv12Wrapper(SwsContext *c, const uint8_t *src[],
     return srcSliceH;
 }
 
+static int rgb24ToYv12Wrapper(SwsContext *c, const uint8_t *src[],
+                              int srcStride[], int srcSliceY, int srcSliceH,
+                              uint8_t *dst[], int dstStride[])
+{
+    ff_rgb24toyv12(
+        src[0],
+        dst[0] +  srcSliceY       * dstStride[0],
+        dst[1] + (srcSliceY >> 1) * dstStride[1],
+        dst[2] + (srcSliceY >> 1) * dstStride[2],
+        c->srcW, srcSliceH,
+        dstStride[0], dstStride[1], srcStride[0],
+        c->input_rgb2yuv_table);
+    if (dst[3])
+        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
+    return srcSliceH;
+}
+
 static int yvu9ToYv12Wrapper(SwsContext *c, const uint8_t *src[],
                              int srcStride[], int srcSliceY, int srcSliceH,
                              uint8_t *dst[], int dstStride[])
@@ -2035,8 +2052,13 @@  void ff_get_unscaled_swscale(SwsContext *c)
     /* bgr24toYV12 */
     if (srcFormat == AV_PIX_FMT_BGR24 &&
         (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) &&
-        !(flags & SWS_ACCURATE_RND) && !(dstW&1))
+        !(flags & (SWS_ACCURATE_RND | SWS_BITEXACT)) && !(dstW&1))
         c->convert_unscaled = bgr24ToYv12Wrapper;
+    /* rgb24toYV12 */
+    if (srcFormat == AV_PIX_FMT_RGB24 &&
+        (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) &&
+        !(flags & (SWS_ACCURATE_RND | SWS_BITEXACT)) && !(dstW&1))
+        c->convert_unscaled = rgb24ToYv12Wrapper;
 
     /* RGB/BGR -> RGB/BGR (no dither needed forms) */
     if (isAnyRGB(srcFormat) && isAnyRGB(dstFormat) && findRgbConvFn(c)