diff mbox

[FFmpeg-devel,4/7] Adds gray floating-point pixel formats.

Message ID 20180802185248.18168-5-dualfal@gmail.com
State Superseded
Headers show

Commit Message

Sergey Lavrushkin Aug. 2, 2018, 6:52 p.m. UTC
This patch adds two floating-point gray formats to use them in sr filter for 
conversion with libswscale. I added conversion from uint gray to float and
backwards in swscale_unscaled.c, that is enough for sr filter. But for
proper format addition, should I add anything else?

---
 libavutil/pixdesc.c           | 22 ++++++++++++++++++
 libavutil/pixfmt.h            |  5 ++++
 libswscale/swscale_internal.h |  7 ++++++
 libswscale/swscale_unscaled.c | 54 +++++++++++++++++++++++++++++++++++++++++--
 libswscale/utils.c            |  5 +++-
 5 files changed, 90 insertions(+), 3 deletions(-)

Comments

Martin Vignali Aug. 2, 2018, 7:22 p.m. UTC | #1
>
> +static int uint_y_to_float_y_wrapper(SwsContext *c, const uint8_t *src[],
> +                                     int srcStride[], int srcSliceY,
> +                                     int srcSliceH, uint8_t *dst[], int
> dstStride[])
> +{
> +    int y, x;
> +    int dstStrideFloat = dstStride[0] >> 2;;
> +    const uint8_t *srcPtr = src[0];
> +    float *dstPtr = (float *)(dst[0] + dstStride[0] * srcSliceY);
> +
> +    for (y = 0; y < srcSliceH; ++y){
> +        for (x = 0; x < c->srcW; ++x){
> +            dstPtr[x] = (float)srcPtr[x] / 255.0f;
> +        }
> +        srcPtr += srcStride[0];
> +        dstPtr += dstStrideFloat;
> +    }
> +
> +    return srcSliceH;
> +}
> +
> +static int float_y_to_uint_y_wrapper(SwsContext *c, const uint8_t* src[],
> +                                     int srcStride[], int srcSliceY,
> +                                     int srcSliceH, uint8_t* dst[], int
> dstStride[])
> +{
> +    int y, x;
> +    int srcStrideFloat = srcStride[0] >> 2;
> +    const float *srcPtr = (const float *)src[0];
> +    uint8_t *dstPtr = dst[0] + dstStride[0] * srcSliceY;
> +
> +    for (y = 0; y < srcSliceH; ++y){
> +        for (x = 0; x < c->srcW; ++x){
> +            dstPtr[x] = (uint8_t)(255.0f * FFMIN(FFMAX(srcPtr[x], 0.0f),
> 1.0f));
> +        }
> +        srcPtr += srcStrideFloat;
> +        dstPtr += dstStride[0];
> +    }
> +
> +    return srcSliceH;
> +}
>


Maybe you can avoid to use float for these conversions
like in libavcodec/exr.c, where there is a float to uint16 conversion

Martin
James Almer Aug. 2, 2018, 7:32 p.m. UTC | #2
On 8/2/2018 4:22 PM, Martin Vignali wrote:
>>
>> +static int uint_y_to_float_y_wrapper(SwsContext *c, const uint8_t *src[],
>> +                                     int srcStride[], int srcSliceY,
>> +                                     int srcSliceH, uint8_t *dst[], int
>> dstStride[])
>> +{
>> +    int y, x;
>> +    int dstStrideFloat = dstStride[0] >> 2;;
>> +    const uint8_t *srcPtr = src[0];
>> +    float *dstPtr = (float *)(dst[0] + dstStride[0] * srcSliceY);
>> +
>> +    for (y = 0; y < srcSliceH; ++y){
>> +        for (x = 0; x < c->srcW; ++x){
>> +            dstPtr[x] = (float)srcPtr[x] / 255.0f;
>> +        }
>> +        srcPtr += srcStride[0];
>> +        dstPtr += dstStrideFloat;
>> +    }
>> +
>> +    return srcSliceH;
>> +}
>> +
>> +static int float_y_to_uint_y_wrapper(SwsContext *c, const uint8_t* src[],
>> +                                     int srcStride[], int srcSliceY,
>> +                                     int srcSliceH, uint8_t* dst[], int
>> dstStride[])
>> +{
>> +    int y, x;
>> +    int srcStrideFloat = srcStride[0] >> 2;
>> +    const float *srcPtr = (const float *)src[0];
>> +    uint8_t *dstPtr = dst[0] + dstStride[0] * srcSliceY;
>> +
>> +    for (y = 0; y < srcSliceH; ++y){
>> +        for (x = 0; x < c->srcW; ++x){
>> +            dstPtr[x] = (uint8_t)(255.0f * FFMIN(FFMAX(srcPtr[x], 0.0f),
>> 1.0f));
>> +        }
>> +        srcPtr += srcStrideFloat;
>> +        dstPtr += dstStride[0];
>> +    }
>> +
>> +    return srcSliceH;
>> +}
>>
> 
> 
> Maybe you can avoid to use float for these conversions
> like in libavcodec/exr.c, where there is a float to uint16 conversion
> 
> Martin

Unless i'm missing something, I think the point is that he was asked to
not do format conversion within the filter itself.
Carl Eugen Hoyos Aug. 2, 2018, 11:56 p.m. UTC | #3
2018-08-02 20:52 GMT+02:00, Sergey Lavrushkin <dualfal@gmail.com>:
> This patch adds two floating-point gray formats to use them in sr filter for
> conversion with libswscale. I added conversion from uint gray to float and
> backwards in swscale_unscaled.c, that is enough for sr filter. But for
> proper format addition, should I add anything else?

I would have expected the conversion to be from and to GRAY16, is
this simply a wrong assumption?

Carl Eugen
Michael Niedermayer Aug. 3, 2018, 1:07 p.m. UTC | #4
On Thu, Aug 02, 2018 at 09:52:45PM +0300, Sergey Lavrushkin wrote:
> This patch adds two floating-point gray formats to use them in sr filter for 
> conversion with libswscale. I added conversion from uint gray to float and
> backwards in swscale_unscaled.c, that is enough for sr filter. But for
> proper format addition, should I add anything else?
> 
> ---
>  libavutil/pixdesc.c           | 22 ++++++++++++++++++
>  libavutil/pixfmt.h            |  5 ++++
>  libswscale/swscale_internal.h |  7 ++++++
>  libswscale/swscale_unscaled.c | 54 +++++++++++++++++++++++++++++++++++++++++--
>  libswscale/utils.c            |  5 +++-

please split this in a patch or libavutil and one for libswscale
they also need some version.h bump

also fate tests need an update, (make fate) fails otherwise, the update should
be part of the patch that causes the failure otherwise


>  5 files changed, 90 insertions(+), 3 deletions(-)
> 
> diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c
> index 96e079584a..7d307d9120 100644
> --- a/libavutil/pixdesc.c
> +++ b/libavutil/pixdesc.c
> @@ -2198,6 +2198,28 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
>          .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA |
>                   AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_FLOAT,
>      },
> +    [AV_PIX_FMT_GRAYF32BE] = {
> +        .name = "grayf32be",
> +        .nb_components = 1,
> +        .log2_chroma_w = 0,
> +        .log2_chroma_h = 0,
> +        .comp = {
> +            { 0, 4, 0, 0, 32, 3, 31, 1 },       /* Y */
> +        },
> +        .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_FLOAT,
> +        .alias = "yf32be",
> +    },
> +    [AV_PIX_FMT_GRAYF32LE] = {
> +        .name = "grayf32le",
> +        .nb_components = 1,
> +        .log2_chroma_w = 0,
> +        .log2_chroma_h = 0,
> +        .comp = {
> +            { 0, 4, 0, 0, 32, 3, 31, 1 },       /* Y */
> +        },
> +        .flags = AV_PIX_FMT_FLAG_FLOAT,
> +        .alias = "yf32le",
> +    },
>      [AV_PIX_FMT_DRM_PRIME] = {
>          .name = "drm_prime",
>          .flags = AV_PIX_FMT_FLAG_HWACCEL,

> diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
> index 2b3307845e..aa9a4f60c1 100644
> --- a/libavutil/pixfmt.h
> +++ b/libavutil/pixfmt.h
> @@ -320,6 +320,9 @@ enum AVPixelFormat {
>      AV_PIX_FMT_GBRAPF32BE, ///< IEEE-754 single precision planar GBRA 4:4:4:4, 128bpp, big-endian
>      AV_PIX_FMT_GBRAPF32LE, ///< IEEE-754 single precision planar GBRA 4:4:4:4, 128bpp, little-endian
>  
> +    AV_PIX_FMT_GRAYF32BE,  ///< IEEE-754 single precision Y, 32bpp, big-endian
> +    AV_PIX_FMT_GRAYF32LE,  ///< IEEE-754 single precision Y, 32bpp, little-endian
> +
>      /**
>       * DRM-managed buffers exposed through PRIME buffer sharing.
>       *

new enum values can only be added in such a way that no value of an existing
enum changes. This would change the value of the following enums



> @@ -405,6 +408,8 @@ enum AVPixelFormat {
>  #define AV_PIX_FMT_GBRPF32    AV_PIX_FMT_NE(GBRPF32BE,  GBRPF32LE)
>  #define AV_PIX_FMT_GBRAPF32   AV_PIX_FMT_NE(GBRAPF32BE, GBRAPF32LE)
>  
> +#define AV_PIX_FMT_GRAYF32 AV_PIX_FMT_NE(GRAYF32BE, GRAYF32LE)
> +
>  #define AV_PIX_FMT_YUVA420P9  AV_PIX_FMT_NE(YUVA420P9BE , YUVA420P9LE)
>  #define AV_PIX_FMT_YUVA422P9  AV_PIX_FMT_NE(YUVA422P9BE , YUVA422P9LE)
>  #define AV_PIX_FMT_YUVA444P9  AV_PIX_FMT_NE(YUVA444P9BE , YUVA444P9LE)
> diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
> index 1703856ab2..4a2cdfe658 100644
> --- a/libswscale/swscale_internal.h
> +++ b/libswscale/swscale_internal.h
> @@ -764,6 +764,13 @@ static av_always_inline int isAnyRGB(enum AVPixelFormat pix_fmt)
>              pix_fmt == AV_PIX_FMT_MONOBLACK || pix_fmt == AV_PIX_FMT_MONOWHITE;
>  }
>  
> +static av_always_inline int isFloat(enum AVPixelFormat pix_fmt)
> +{
> +    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
> +    av_assert0(desc);
> +    return desc->flags & AV_PIX_FMT_FLAG_FLOAT;
> +}
> +
>  static av_always_inline int isALPHA(enum AVPixelFormat pix_fmt)
>  {
>      const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);

> diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
> index 6480070cbf..f5b4c9be9d 100644
> --- a/libswscale/swscale_unscaled.c
> +++ b/libswscale/swscale_unscaled.c
> @@ -1467,6 +1467,46 @@ static int yvu9ToYv12Wrapper(SwsContext *c, const uint8_t *src[],
>      return srcSliceH;
>  }
>  
> +static int uint_y_to_float_y_wrapper(SwsContext *c, const uint8_t *src[],
> +                                     int srcStride[], int srcSliceY,
> +                                     int srcSliceH, uint8_t *dst[], int dstStride[])
> +{
> +    int y, x;
> +    int dstStrideFloat = dstStride[0] >> 2;;

theres a ; too much
also newly added strides should probably be ptrdiff_t


> +    const uint8_t *srcPtr = src[0];
> +    float *dstPtr = (float *)(dst[0] + dstStride[0] * srcSliceY);
> +
> +    for (y = 0; y < srcSliceH; ++y){
> +        for (x = 0; x < c->srcW; ++x){
> +            dstPtr[x] = (float)srcPtr[x] / 255.0f;

division is slow. This should either be a multiplication with the
inverse or a LUT with 8bit index changing to float.

The faster of them should be used

thx

[...]
Sergey Lavrushkin Aug. 3, 2018, 7:33 p.m. UTC | #5
2018-08-03 16:07 GMT+03:00 Michael Niedermayer <michael@niedermayer.cc>:

> On Thu, Aug 02, 2018 at 09:52:45PM +0300, Sergey Lavrushkin wrote:
> > This patch adds two floating-point gray formats to use them in sr filter
> for
> > conversion with libswscale. I added conversion from uint gray to float
> and
> > backwards in swscale_unscaled.c, that is enough for sr filter. But for
> > proper format addition, should I add anything else?
> >
> > ---
> >  libavutil/pixdesc.c           | 22 ++++++++++++++++++
> >  libavutil/pixfmt.h            |  5 ++++
> >  libswscale/swscale_internal.h |  7 ++++++
> >  libswscale/swscale_unscaled.c | 54 ++++++++++++++++++++++++++++++
> +++++++++++--
> >  libswscale/utils.c            |  5 +++-
>
> please split this in a patch or libavutil and one for libswscale
> they also need some version.h bump
>

Ok.

also fate tests need an update, (make fate) fails otherwise, the update
> should
> be part of the patch that causes the failure otherwise


In one test for these formats I get:

filter-pixfmts-scale
grayf32be           grayf32le           monob
 f01cb0b623357387827902d9d0963435

I guess, it is because I only implemented conversion in swscale_unscaled.
What can I do to fix it? Should I implement conversion for scaling or maybe
change something in the test, so it would not check these formats (if it is
possible).
Anyway, I need to know what changes should I do and where.


> >  5 files changed, 90 insertions(+), 3 deletions(-)
> >
> > diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c
> > index 96e079584a..7d307d9120 100644
> > --- a/libavutil/pixdesc.c
> > +++ b/libavutil/pixdesc.c
> > @@ -2198,6 +2198,28 @@ static const AVPixFmtDescriptor
> av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
> >          .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA |
> >                   AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_FLOAT,
> >      },
> > +    [AV_PIX_FMT_GRAYF32BE] = {
> > +        .name = "grayf32be",
> > +        .nb_components = 1,
> > +        .log2_chroma_w = 0,
> > +        .log2_chroma_h = 0,
> > +        .comp = {
> > +            { 0, 4, 0, 0, 32, 3, 31, 1 },       /* Y */
> > +        },
> > +        .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_FLOAT,
> > +        .alias = "yf32be",
> > +    },
> > +    [AV_PIX_FMT_GRAYF32LE] = {
> > +        .name = "grayf32le",
> > +        .nb_components = 1,
> > +        .log2_chroma_w = 0,
> > +        .log2_chroma_h = 0,
> > +        .comp = {
> > +            { 0, 4, 0, 0, 32, 3, 31, 1 },       /* Y */
> > +        },
> > +        .flags = AV_PIX_FMT_FLAG_FLOAT,
> > +        .alias = "yf32le",
> > +    },
> >      [AV_PIX_FMT_DRM_PRIME] = {
> >          .name = "drm_prime",
> >          .flags = AV_PIX_FMT_FLAG_HWACCEL,
>
> > diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
> > index 2b3307845e..aa9a4f60c1 100644
> > --- a/libavutil/pixfmt.h
> > +++ b/libavutil/pixfmt.h
> > @@ -320,6 +320,9 @@ enum AVPixelFormat {
> >      AV_PIX_FMT_GBRAPF32BE, ///< IEEE-754 single precision planar GBRA
> 4:4:4:4, 128bpp, big-endian
> >      AV_PIX_FMT_GBRAPF32LE, ///< IEEE-754 single precision planar GBRA
> 4:4:4:4, 128bpp, little-endian
> >
> > +    AV_PIX_FMT_GRAYF32BE,  ///< IEEE-754 single precision Y, 32bpp,
> big-endian
> > +    AV_PIX_FMT_GRAYF32LE,  ///< IEEE-754 single precision Y, 32bpp,
> little-endian
> > +
> >      /**
> >       * DRM-managed buffers exposed through PRIME buffer sharing.
> >       *
>
> new enum values can only be added in such a way that no value of an
> existing
> enum changes. This would change the value of the following enums


Ok.

> @@ -405,6 +408,8 @@ enum AVPixelFormat {
> >  #define AV_PIX_FMT_GBRPF32    AV_PIX_FMT_NE(GBRPF32BE,  GBRPF32LE)
> >  #define AV_PIX_FMT_GBRAPF32   AV_PIX_FMT_NE(GBRAPF32BE, GBRAPF32LE)
> >
> > +#define AV_PIX_FMT_GRAYF32 AV_PIX_FMT_NE(GRAYF32BE, GRAYF32LE)
> > +
> >  #define AV_PIX_FMT_YUVA420P9  AV_PIX_FMT_NE(YUVA420P9BE , YUVA420P9LE)
> >  #define AV_PIX_FMT_YUVA422P9  AV_PIX_FMT_NE(YUVA422P9BE , YUVA422P9LE)
> >  #define AV_PIX_FMT_YUVA444P9  AV_PIX_FMT_NE(YUVA444P9BE , YUVA444P9LE)
> > diff --git a/libswscale/swscale_internal.h
> b/libswscale/swscale_internal.h
> > index 1703856ab2..4a2cdfe658 100644
> > --- a/libswscale/swscale_internal.h
> > +++ b/libswscale/swscale_internal.h
> > @@ -764,6 +764,13 @@ static av_always_inline int isAnyRGB(enum
> AVPixelFormat pix_fmt)
> >              pix_fmt == AV_PIX_FMT_MONOBLACK || pix_fmt ==
> AV_PIX_FMT_MONOWHITE;
> >  }
> >
> > +static av_always_inline int isFloat(enum AVPixelFormat pix_fmt)
> > +{
> > +    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
> > +    av_assert0(desc);
> > +    return desc->flags & AV_PIX_FMT_FLAG_FLOAT;
> > +}
> > +
> >  static av_always_inline int isALPHA(enum AVPixelFormat pix_fmt)
> >  {
> >      const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
>
> > diff --git a/libswscale/swscale_unscaled.c
> b/libswscale/swscale_unscaled.c
> > index 6480070cbf..f5b4c9be9d 100644
> > --- a/libswscale/swscale_unscaled.c
> > +++ b/libswscale/swscale_unscaled.c
> > @@ -1467,6 +1467,46 @@ static int yvu9ToYv12Wrapper(SwsContext *c, const
> uint8_t *src[],
> >      return srcSliceH;
> >  }
> >
> > +static int uint_y_to_float_y_wrapper(SwsContext *c, const uint8_t
> *src[],
> > +                                     int srcStride[], int srcSliceY,
> > +                                     int srcSliceH, uint8_t *dst[], int
> dstStride[])
> > +{
> > +    int y, x;
> > +    int dstStrideFloat = dstStride[0] >> 2;;
>
> theres a ; too much
> also newly added strides should probably be ptrdiff_t


Ok.

> +    const uint8_t *srcPtr = src[0];
> > +    float *dstPtr = (float *)(dst[0] + dstStride[0] * srcSliceY);
> > +
> > +    for (y = 0; y < srcSliceH; ++y){
> > +        for (x = 0; x < c->srcW; ++x){
> > +            dstPtr[x] = (float)srcPtr[x] / 255.0f;
>
> division is slow. This should either be a multiplication with the
> inverse or a LUT with 8bit index changing to float.
>
> The faster of them should be used
>

LUT seems to be faster. Can I place it in SwsContext and initialize it in
sws_init_context when necessary?
Michael Niedermayer Aug. 3, 2018, 9:11 p.m. UTC | #6
On Fri, Aug 03, 2018 at 10:33:00PM +0300, Sergey Lavrushkin wrote:
> 2018-08-03 16:07 GMT+03:00 Michael Niedermayer <michael@niedermayer.cc>:
> 
> > On Thu, Aug 02, 2018 at 09:52:45PM +0300, Sergey Lavrushkin wrote:
> > > This patch adds two floating-point gray formats to use them in sr filter
> > for
> > > conversion with libswscale. I added conversion from uint gray to float
> > and
> > > backwards in swscale_unscaled.c, that is enough for sr filter. But for
> > > proper format addition, should I add anything else?
> > >
> > > ---
> > >  libavutil/pixdesc.c           | 22 ++++++++++++++++++
> > >  libavutil/pixfmt.h            |  5 ++++
> > >  libswscale/swscale_internal.h |  7 ++++++
> > >  libswscale/swscale_unscaled.c | 54 ++++++++++++++++++++++++++++++
> > +++++++++++--
> > >  libswscale/utils.c            |  5 +++-
> >
> > please split this in a patch or libavutil and one for libswscale
> > they also need some version.h bump
> >
> 
> Ok.
> 
> also fate tests need an update, (make fate) fails otherwise, the update
> > should
> > be part of the patch that causes the failure otherwise
> 
> 
> In one test for these formats I get:
> 
> filter-pixfmts-scale
> grayf32be           grayf32le           monob
>  f01cb0b623357387827902d9d0963435
> 
> I guess, it is because I only implemented conversion in swscale_unscaled.
> What can I do to fix it? Should I implement conversion for scaling or maybe
> change something in the test, so it would not check these formats (if it is
> possible).
> Anyway, I need to know what changes should I do and where.

well, swscale shouldnt really have formats only half supported
so for any supported format in and out it should work with any
width / height in / out

Theres a wide range of possibilities how to implement this.
The correct / ideal way is of course to implement a full floating point path
for scaling along side the integer code.
a simpler aprouch would be to convert from/to float to/from  integers and use
the existing code. (this of course has the disadvantage of loosing precission)


[...]
> > +    const uint8_t *srcPtr = src[0];
> > > +    float *dstPtr = (float *)(dst[0] + dstStride[0] * srcSliceY);
> > > +
> > > +    for (y = 0; y < srcSliceH; ++y){
> > > +        for (x = 0; x < c->srcW; ++x){
> > > +            dstPtr[x] = (float)srcPtr[x] / 255.0f;
> >
> > division is slow. This should either be a multiplication with the
> > inverse or a LUT with 8bit index changing to float.
> >
> > The faster of them should be used
> >
> 
> LUT seems to be faster. Can I place it in SwsContext and initialize it in
> sws_init_context when necessary?

yes of course

thanks

[...]
Sergey Lavrushkin Aug. 3, 2018, 10:17 p.m. UTC | #7
2018-08-04 0:11 GMT+03:00 Michael Niedermayer <michael@niedermayer.cc>:

> On Fri, Aug 03, 2018 at 10:33:00PM +0300, Sergey Lavrushkin wrote:
> > 2018-08-03 16:07 GMT+03:00 Michael Niedermayer <michael@niedermayer.cc>:
> >
> > > On Thu, Aug 02, 2018 at 09:52:45PM +0300, Sergey Lavrushkin wrote:
> > > > This patch adds two floating-point gray formats to use them in sr
> filter
> > > for
> > > > conversion with libswscale. I added conversion from uint gray to
> float
> > > and
> > > > backwards in swscale_unscaled.c, that is enough for sr filter. But
> for
> > > > proper format addition, should I add anything else?
> > > >
> > > > ---
> > > >  libavutil/pixdesc.c           | 22 ++++++++++++++++++
> > > >  libavutil/pixfmt.h            |  5 ++++
> > > >  libswscale/swscale_internal.h |  7 ++++++
> > > >  libswscale/swscale_unscaled.c | 54 ++++++++++++++++++++++++++++++
> > > +++++++++++--
> > > >  libswscale/utils.c            |  5 +++-
> > >
> > > please split this in a patch or libavutil and one for libswscale
> > > they also need some version.h bump
> > >
> >
> > Ok.
> >
> > also fate tests need an update, (make fate) fails otherwise, the update
> > > should
> > > be part of the patch that causes the failure otherwise
> >
> >
> > In one test for these formats I get:
> >
> > filter-pixfmts-scale
> > grayf32be           grayf32le           monob
> >  f01cb0b623357387827902d9d0963435
> >
> > I guess, it is because I only implemented conversion in swscale_unscaled.
> > What can I do to fix it? Should I implement conversion for scaling or
> maybe
> > change something in the test, so it would not check these formats (if it
> is
> > possible).
> > Anyway, I need to know what changes should I do and where.
>
> well, swscale shouldnt really have formats only half supported
> so for any supported format in and out it should work with any
> width / height in / out
>
> Theres a wide range of possibilities how to implement this.
> The correct / ideal way is of course to implement a full floating point
> path
> for scaling along side the integer code.
> a simpler aprouch would be to convert from/to float to/from  integers and
> use
> the existing code. (this of course has the disadvantage of loosing
> precission)
>

Well, I want to implement simpler approach, as I still have to finish
correcting sr filter.
But I need some explanations regarding what I should add. If I understand
correcly,
I need to add conversion from float to the ff_sws_init_input_funcs function
in libswscale/input.c
and conversion to float to the ff_sws_init_output_funcs function in
libswscale/output.c
If I am not mistaken, in the first case I need to provide c->lumToYV12 and
in the second case -
yuv2plane1 and yuv2planeX. So, in the first case, to what format should I
add
conversion, specifically what number of bits per pixel should be used? As I
look through other
conversion functions, it seems that somewhere uint8 is used and somewhere -
uint16.
Is it somehow determined later during scaling? If I am going to convert to
uint8 from
my float format, should I define it somewhere, that I am converting to
uint8?
And in the second case, I don't completely understand, what these two
functions are
doing, especially tha last one with filters. Is it also just simple
conversions or
these functions also cover something else? And in their descriptions it is
written, that:

 * @param src     scaled source data, 15 bits for 8-10-bit output,
 *                19 bits for 16-bit output (in int32_t)
 * @param dest    pointer to the output plane. For >8-bit
 *                output, this is in uint16_t

In my case, the output is 32-bit. Does this mean, that float type,
basically, is not
supported and I also have to modify something in scaling? If so, what
should I add?



> [...]
> > > +    const uint8_t *srcPtr = src[0];
> > > > +    float *dstPtr = (float *)(dst[0] + dstStride[0] * srcSliceY);
> > > > +
> > > > +    for (y = 0; y < srcSliceH; ++y){
> > > > +        for (x = 0; x < c->srcW; ++x){
> > > > +            dstPtr[x] = (float)srcPtr[x] / 255.0f;
> > >
> > > division is slow. This should either be a multiplication with the
> > > inverse or a LUT with 8bit index changing to float.
> > >
> > > The faster of them should be used
> > >
> >
> > LUT seems to be faster. Can I place it in SwsContext and initialize it in
> > sws_init_context when necessary?
>
> yes of course
>
> thanks
>
>
Reto Kromer Aug. 4, 2018, 6:23 a.m. UTC | #8
Sergey Lavrushkin wrote:

>2018-08-03 16:07 GMT+03:00 Michael Niedermayer
><michael@niedermayer.cc>:

[...]

>>division is slow. This should either be a multiplication with
>>the inverse or a LUT with 8bit index changing to float.
>>
>>The faster of them should be used
>
>LUT seems to be faster.

I am not surprised. In my experience, LUTs are often the faster
solution.

Best regards, Reto
Michael Niedermayer Aug. 4, 2018, 1:10 p.m. UTC | #9
Hi

On Sat, Aug 04, 2018 at 01:17:53AM +0300, Sergey Lavrushkin wrote:
> 2018-08-04 0:11 GMT+03:00 Michael Niedermayer <michael@niedermayer.cc>:
> 
> > On Fri, Aug 03, 2018 at 10:33:00PM +0300, Sergey Lavrushkin wrote:
> > > 2018-08-03 16:07 GMT+03:00 Michael Niedermayer <michael@niedermayer.cc>:
> > >
> > > > On Thu, Aug 02, 2018 at 09:52:45PM +0300, Sergey Lavrushkin wrote:
> > > > > This patch adds two floating-point gray formats to use them in sr
> > filter
> > > > for
> > > > > conversion with libswscale. I added conversion from uint gray to
> > float
> > > > and
> > > > > backwards in swscale_unscaled.c, that is enough for sr filter. But
> > for
> > > > > proper format addition, should I add anything else?
> > > > >
> > > > > ---
> > > > >  libavutil/pixdesc.c           | 22 ++++++++++++++++++
> > > > >  libavutil/pixfmt.h            |  5 ++++
> > > > >  libswscale/swscale_internal.h |  7 ++++++
> > > > >  libswscale/swscale_unscaled.c | 54 ++++++++++++++++++++++++++++++
> > > > +++++++++++--
> > > > >  libswscale/utils.c            |  5 +++-
> > > >
> > > > please split this in a patch or libavutil and one for libswscale
> > > > they also need some version.h bump
> > > >
> > >
> > > Ok.
> > >
> > > also fate tests need an update, (make fate) fails otherwise, the update
> > > > should
> > > > be part of the patch that causes the failure otherwise
> > >
> > >
> > > In one test for these formats I get:
> > >
> > > filter-pixfmts-scale
> > > grayf32be           grayf32le           monob
> > >  f01cb0b623357387827902d9d0963435
> > >
> > > I guess, it is because I only implemented conversion in swscale_unscaled.
> > > What can I do to fix it? Should I implement conversion for scaling or
> > maybe
> > > change something in the test, so it would not check these formats (if it
> > is
> > > possible).
> > > Anyway, I need to know what changes should I do and where.
> >
> > well, swscale shouldnt really have formats only half supported
> > so for any supported format in and out it should work with any
> > width / height in / out
> >
> > Theres a wide range of possibilities how to implement this.
> > The correct / ideal way is of course to implement a full floating point
> > path
> > for scaling along side the integer code.
> > a simpler aprouch would be to convert from/to float to/from  integers and
> > use
> > the existing code. (this of course has the disadvantage of loosing
> > precission)
> >
> 
> Well, I want to implement simpler approach, as I still have to finish
> correcting sr filter.
> But I need some explanations regarding what I should add. If I understand
> correcly,

> I need to add conversion from float to the ff_sws_init_input_funcs function
> in libswscale/input.c
> and conversion to float to the ff_sws_init_output_funcs function in
> libswscale/output.c
> If I am not mistaken, in the first case I need to provide c->lumToYV12 and
> in the second case -
> yuv2plane1 and yuv2planeX.

yes that sounds correct


> So, in the first case, to what format should I
> add
> conversion, specifically what number of bits per pixel should be used? As I
> look through other
> conversion functions, it seems that somewhere uint8 is used and somewhere -
> uint16.

you should try to maintain as many bits as possible, so a path similar to
what 16bit formats use would be best


> Is it somehow determined later during scaling? If I am going to convert to
> uint8 from
> my float format, should I define it somewhere, that I am converting to
> uint8?
> And in the second case, I don't completely understand, what these two
> functions are
> doing, especially tha last one with filters. Is it also just simple
> conversions or
> these functions also cover something else? And in their descriptions it is
> written, that:
> 
>  * @param src     scaled source data, 15 bits for 8-10-bit output,
>  *                19 bits for 16-bit output (in int32_t)
>  * @param dest    pointer to the output plane. For >8-bit
>  *                output, this is in uint16_t

also see doc/swscale.txt
note this text is IIRC older than the current swscale code so it
may be missing some things but the general architecture should be correct

also if you notice any errors or omissions in the documentation then
patches improving it are certainly welcome!


> 
> In my case, the output is 32-bit. Does this mean, that float type,
> basically, is not
> supported and I also have to modify something in scaling? If so, what
> should I add?

when input gets converted to the internal integer format and that gets
convverted back to floats at output then scaling should be fine as it
wouldnt "know" there are floats outside

thanks

[...]
diff mbox

Patch

diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c
index 96e079584a..7d307d9120 100644
--- a/libavutil/pixdesc.c
+++ b/libavutil/pixdesc.c
@@ -2198,6 +2198,28 @@  static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA |
                  AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_FLOAT,
     },
+    [AV_PIX_FMT_GRAYF32BE] = {
+        .name = "grayf32be",
+        .nb_components = 1,
+        .log2_chroma_w = 0,
+        .log2_chroma_h = 0,
+        .comp = {
+            { 0, 4, 0, 0, 32, 3, 31, 1 },       /* Y */
+        },
+        .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_FLOAT,
+        .alias = "yf32be",
+    },
+    [AV_PIX_FMT_GRAYF32LE] = {
+        .name = "grayf32le",
+        .nb_components = 1,
+        .log2_chroma_w = 0,
+        .log2_chroma_h = 0,
+        .comp = {
+            { 0, 4, 0, 0, 32, 3, 31, 1 },       /* Y */
+        },
+        .flags = AV_PIX_FMT_FLAG_FLOAT,
+        .alias = "yf32le",
+    },
     [AV_PIX_FMT_DRM_PRIME] = {
         .name = "drm_prime",
         .flags = AV_PIX_FMT_FLAG_HWACCEL,
diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
index 2b3307845e..aa9a4f60c1 100644
--- a/libavutil/pixfmt.h
+++ b/libavutil/pixfmt.h
@@ -320,6 +320,9 @@  enum AVPixelFormat {
     AV_PIX_FMT_GBRAPF32BE, ///< IEEE-754 single precision planar GBRA 4:4:4:4, 128bpp, big-endian
     AV_PIX_FMT_GBRAPF32LE, ///< IEEE-754 single precision planar GBRA 4:4:4:4, 128bpp, little-endian
 
+    AV_PIX_FMT_GRAYF32BE,  ///< IEEE-754 single precision Y, 32bpp, big-endian
+    AV_PIX_FMT_GRAYF32LE,  ///< IEEE-754 single precision Y, 32bpp, little-endian
+
     /**
      * DRM-managed buffers exposed through PRIME buffer sharing.
      *
@@ -405,6 +408,8 @@  enum AVPixelFormat {
 #define AV_PIX_FMT_GBRPF32    AV_PIX_FMT_NE(GBRPF32BE,  GBRPF32LE)
 #define AV_PIX_FMT_GBRAPF32   AV_PIX_FMT_NE(GBRAPF32BE, GBRAPF32LE)
 
+#define AV_PIX_FMT_GRAYF32 AV_PIX_FMT_NE(GRAYF32BE, GRAYF32LE)
+
 #define AV_PIX_FMT_YUVA420P9  AV_PIX_FMT_NE(YUVA420P9BE , YUVA420P9LE)
 #define AV_PIX_FMT_YUVA422P9  AV_PIX_FMT_NE(YUVA422P9BE , YUVA422P9LE)
 #define AV_PIX_FMT_YUVA444P9  AV_PIX_FMT_NE(YUVA444P9BE , YUVA444P9LE)
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 1703856ab2..4a2cdfe658 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -764,6 +764,13 @@  static av_always_inline int isAnyRGB(enum AVPixelFormat pix_fmt)
             pix_fmt == AV_PIX_FMT_MONOBLACK || pix_fmt == AV_PIX_FMT_MONOWHITE;
 }
 
+static av_always_inline int isFloat(enum AVPixelFormat pix_fmt)
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
+    av_assert0(desc);
+    return desc->flags & AV_PIX_FMT_FLAG_FLOAT;
+}
+
 static av_always_inline int isALPHA(enum AVPixelFormat pix_fmt)
 {
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index 6480070cbf..f5b4c9be9d 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -1467,6 +1467,46 @@  static int yvu9ToYv12Wrapper(SwsContext *c, const uint8_t *src[],
     return srcSliceH;
 }
 
+static int uint_y_to_float_y_wrapper(SwsContext *c, const uint8_t *src[],
+                                     int srcStride[], int srcSliceY,
+                                     int srcSliceH, uint8_t *dst[], int dstStride[])
+{
+    int y, x;
+    int dstStrideFloat = dstStride[0] >> 2;;
+    const uint8_t *srcPtr = src[0];
+    float *dstPtr = (float *)(dst[0] + dstStride[0] * srcSliceY);
+
+    for (y = 0; y < srcSliceH; ++y){
+        for (x = 0; x < c->srcW; ++x){
+            dstPtr[x] = (float)srcPtr[x] / 255.0f;
+        }
+        srcPtr += srcStride[0];
+        dstPtr += dstStrideFloat;
+    }
+
+    return srcSliceH;
+}
+
+static int float_y_to_uint_y_wrapper(SwsContext *c, const uint8_t* src[],
+                                     int srcStride[], int srcSliceY,
+                                     int srcSliceH, uint8_t* dst[], int dstStride[])
+{
+    int y, x;
+    int srcStrideFloat = srcStride[0] >> 2;
+    const float *srcPtr = (const float *)src[0];
+    uint8_t *dstPtr = dst[0] + dstStride[0] * srcSliceY;
+
+    for (y = 0; y < srcSliceH; ++y){
+        for (x = 0; x < c->srcW; ++x){
+            dstPtr[x] = (uint8_t)(255.0f * FFMIN(FFMAX(srcPtr[x], 0.0f), 1.0f));
+        }
+        srcPtr += srcStrideFloat;
+        dstPtr += dstStride[0];
+    }
+
+    return srcSliceH;
+}
+
 /* unscaled copy like stuff (assumes nearly identical formats) */
 static int packedCopyWrapper(SwsContext *c, const uint8_t *src[],
                              int srcStride[], int srcSliceY, int srcSliceH,
@@ -1899,6 +1939,16 @@  void ff_get_unscaled_swscale(SwsContext *c)
             c->swscale = yuv422pToUyvyWrapper;
     }
 
+    /* uint Y to float Y */
+    if (srcFormat == AV_PIX_FMT_GRAY8 && dstFormat == AV_PIX_FMT_GRAYF32){
+        c->swscale = uint_y_to_float_y_wrapper;
+    }
+
+    /* float Y to uint Y */
+    if (srcFormat == AV_PIX_FMT_GRAYF32 && dstFormat == AV_PIX_FMT_GRAY8){
+        c->swscale = float_y_to_uint_y_wrapper;
+    }
+
     /* LQ converters if -sws 0 or -sws 4*/
     if (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)) {
         /* yv12_to_yuy2 */
@@ -1925,13 +1975,13 @@  void ff_get_unscaled_swscale(SwsContext *c)
     if ( srcFormat == dstFormat ||
         (srcFormat == AV_PIX_FMT_YUVA420P && dstFormat == AV_PIX_FMT_YUV420P) ||
         (srcFormat == AV_PIX_FMT_YUV420P && dstFormat == AV_PIX_FMT_YUVA420P) ||
-        (isPlanarYUV(srcFormat) && isPlanarGray(dstFormat)) ||
+        (isFloat(srcFormat) == isFloat(dstFormat)) && ((isPlanarYUV(srcFormat) && isPlanarGray(dstFormat)) ||
         (isPlanarYUV(dstFormat) && isPlanarGray(srcFormat)) ||
         (isPlanarGray(dstFormat) && isPlanarGray(srcFormat)) ||
         (isPlanarYUV(srcFormat) && isPlanarYUV(dstFormat) &&
          c->chrDstHSubSample == c->chrSrcHSubSample &&
          c->chrDstVSubSample == c->chrSrcVSubSample &&
-         !isSemiPlanarYUV(srcFormat) && !isSemiPlanarYUV(dstFormat)))
+         !isSemiPlanarYUV(srcFormat) && !isSemiPlanarYUV(dstFormat))))
     {
         if (isPacked(c->srcFormat))
             c->swscale = packedCopyWrapper;
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 61b47182f8..dffb236724 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -258,6 +258,8 @@  static const FormatEntry format_entries[AV_PIX_FMT_NB] = {
     [AV_PIX_FMT_P010BE]      = { 1, 1 },
     [AV_PIX_FMT_P016LE]      = { 1, 1 },
     [AV_PIX_FMT_P016BE]      = { 1, 1 },
+    [AV_PIX_FMT_GRAYF32LE]    = { 1, 1 },
+    [AV_PIX_FMT_GRAYF32BE]    = { 1, 1 },
 };
 
 int sws_isSupportedInput(enum AVPixelFormat pix_fmt)
@@ -1793,7 +1795,8 @@  av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
 
     /* unscaled special cases */
     if (unscaled && !usesHFilter && !usesVFilter &&
-        (c->srcRange == c->dstRange || isAnyRGB(dstFormat))) {
+        (c->srcRange == c->dstRange || isAnyRGB(dstFormat) ||
+         srcFormat == AV_PIX_FMT_GRAYF32 || dstFormat == AV_PIX_FMT_GRAYF32)) {
         ff_get_unscaled_swscale(c);
 
         if (c->swscale) {