diff mbox

[FFmpeg-devel,3/4] avfilter/vf_dnn_processing: add format GRAY8 and GRAYF32 support

Message ID 1574409011-15833-1-git-send-email-yejun.guo@intel.com
State New
Headers show

Commit Message

Guo, Yejun Nov. 22, 2019, 7:50 a.m. UTC
Signed-off-by: Guo, Yejun <yejun.guo@intel.com>
---
 doc/filters.texi                |   8 ++-
 libavfilter/vf_dnn_processing.c | 147 ++++++++++++++++++++++++++++++----------
 2 files changed, 118 insertions(+), 37 deletions(-)

Comments

Pedro Arthur Dec. 12, 2019, 4:45 p.m. UTC | #1
Hi,

how should I test this patch?

Em sex., 22 de nov. de 2019 às 04:57, Guo, Yejun <yejun.guo@intel.com>
escreveu:

> Signed-off-by: Guo, Yejun <yejun.guo@intel.com>
> ---
>  doc/filters.texi                |   8 ++-
>  libavfilter/vf_dnn_processing.c | 147
> ++++++++++++++++++++++++++++++----------
>  2 files changed, 118 insertions(+), 37 deletions(-)
>
> diff --git a/doc/filters.texi b/doc/filters.texi
> index 1f86ae1..c3f7997 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -8992,7 +8992,13 @@ Set the input name of the dnn network.
>  Set the output name of the dnn network.
>
>  @item fmt
> -Set the pixel format for the Frame. Allowed values are
> @code{AV_PIX_FMT_RGB24}, and @code{AV_PIX_FMT_BGR24}.
> +Set the pixel format for the Frame, the value is determined by the input
> of the dnn network model.
>
This sentence is a bit confusing, also I think this property should be
removed. (I will explain bellow).

+
> +If the model handles RGB (or BGR) image and the data type of model input
> is uint8, fmt must be @code{AV_PIX_FMT_RGB24} (or @code{AV_PIX_FMT_BGR24}.
> +If the model handles RGB (or BGR) image and the data type of model input
> is float, fmt must be @code{AV_PIX_FMT_RGB24} (or @code{AV_PIX_FMT_BGR24},
> and this filter will do data type conversion internally.
> +If the model handles GRAY image and the data type of model input is
> uint8, fmt must be @code{AV_PIX_FMT_GRAY8}.
> +If the model handles GRAY image and the data type of model input is
> float, fmt must be @code{AV_PIX_FMT_GRAYF32}.
> +
>  Default value is @code{AV_PIX_FMT_RGB24}.
>
>  @end table
> diff --git a/libavfilter/vf_dnn_processing.c
> b/libavfilter/vf_dnn_processing.c
> index ce976ec..963dd5e 100644
> --- a/libavfilter/vf_dnn_processing.c
> +++ b/libavfilter/vf_dnn_processing.c
> @@ -70,10 +70,12 @@ static av_cold int init(AVFilterContext *context)
>  {
>      DnnProcessingContext *ctx = context->priv;
>      int supported = 0;
> -    // as the first step, only rgb24 and bgr24 are supported
> +    // to support more formats
>      const enum AVPixelFormat supported_pixel_fmts[] = {
>          AV_PIX_FMT_RGB24,
>          AV_PIX_FMT_BGR24,
> +        AV_PIX_FMT_GRAY8,
> +        AV_PIX_FMT_GRAYF32,
>      };
>      for (int i = 0; i < sizeof(supported_pixel_fmts) / sizeof(enum
> AVPixelFormat); ++i) {
>          if (supported_pixel_fmts[i] == ctx->fmt) {
> @@ -156,14 +158,38 @@ static int config_input(AVFilterLink *inlink)
>          return AVERROR(EIO);
>      }
>
I think the filter should not check formats manually in the init function
(unless I'm missing something), it would be best if you query for all the
above supported formats in query_formats and later in config_input you make
sure the expected model format matches the frame format.


> -    if (model_input.channels != 3) {
> -        av_log(ctx, AV_LOG_ERROR, "the model requires input channels
> %d\n",
> -                                   model_input.channels);
> -        return AVERROR(EIO);
> -    }
> -    if (model_input.dt != DNN_FLOAT && model_input.dt != DNN_UINT8) {
> -        av_log(ctx, AV_LOG_ERROR, "only support dnn models with input
> data type as float32 and uint8.\n");
> -        return AVERROR(EIO);
> +    if (ctx->fmt == AV_PIX_FMT_RGB24 || ctx->fmt == AV_PIX_FMT_BGR24) {
> +        if (model_input.channels != 3) {
> +            av_log(ctx, AV_LOG_ERROR, "channel number 3 is required, but
> the actual channel number is %d\n",
> +                                       model_input.channels);
> +            return AVERROR(EIO);
> +        }
> +        if (model_input.dt != DNN_FLOAT && model_input.dt != DNN_UINT8) {
> +            av_log(ctx, AV_LOG_ERROR, "only support dnn models with input
> data type as float32 and uint8.\n");
> +            return AVERROR(EIO);
> +        }
> +    } else if (ctx->fmt == AV_PIX_FMT_GRAY8) {
> +        if (model_input.channels != 1) {
> +            av_log(ctx, AV_LOG_ERROR, "channel number 1 is required, but
> the actual channel number is %d\n",
> +                                       model_input.channels);
> +            return AVERROR(EIO);
> +        }
> +        if (model_input.dt != DNN_UINT8) {
> +            av_log(ctx, AV_LOG_ERROR, "only support dnn models with input
> data type as uint8.\n");
> +            return AVERROR(EIO);
> +        }
> +    } else if (ctx->fmt == AV_PIX_FMT_GRAYF32) {
> +        if (model_input.channels != 1) {
> +            av_log(ctx, AV_LOG_ERROR, "channel number 1 is required, but
> the actual channel number is %d\n",
> +                                       model_input.channels);
> +            return AVERROR(EIO);
> +        }
> +        if (model_input.dt != DNN_FLOAT) {
> +            av_log(ctx, AV_LOG_ERROR, "only support dnn models with input
> data type as float.\n");
> +            return AVERROR(EIO);
> +        }
> +    } else {
> +        av_assert0(!"should not reach here.");
>      }
>
General comment on the above and following chained ifs testing pixel
formats, personally,  using switch(fmt) seems more readable.


>
>      ctx->input.width    = inlink->w;
> @@ -203,28 +229,49 @@ static int config_output(AVFilterLink *outlink)
>
>  static int copy_from_frame_to_dnn(DNNData *dnn_input, const AVFrame
> *frame)
>  {
> -    // extend this function to support more formats
> -    av_assert0(frame->format == AV_PIX_FMT_RGB24 || frame->format ==
> AV_PIX_FMT_BGR24);
> -
> -    if (dnn_input->dt == DNN_FLOAT) {
> -        float *dnn_input_data = dnn_input->data;
> -        for (int i = 0; i < frame->height; i++) {
> -            for(int j = 0; j < frame->width * 3; j++) {
> -                int k = i * frame->linesize[0] + j;
> -                int t = i * frame->width * 3 + j;
> -                dnn_input_data[t] = frame->data[0][k] / 255.0f;
> +    if (frame->format == AV_PIX_FMT_RGB24 || frame->format ==
> AV_PIX_FMT_BGR24) {
> +        if (dnn_input->dt == DNN_FLOAT) {
> +            float *dnn_input_data = dnn_input->data;
> +            for (int i = 0; i < frame->height; i++) {
> +                for(int j = 0; j < frame->width * 3; j++) {
> +                    int k = i * frame->linesize[0] + j;
> +                    int t = i * frame->width * 3 + j;
> +                    dnn_input_data[t] = frame->data[0][k] / 255.0f;
> +                }
> +            }
> +        } else {
> +            uint8_t *dnn_input_data = dnn_input->data;
> +            av_assert0(dnn_input->dt == DNN_UINT8);
> +            for (int i = 0; i < frame->height; i++) {
> +                for(int j = 0; j < frame->width * 3; j++) {
> +                    int k = i * frame->linesize[0] + j;
> +                    int t = i * frame->width * 3 + j;
> +                    dnn_input_data[t] = frame->data[0][k];
> +                }
>              }
>          }
> -    } else {
> +    } else if (frame->format == AV_PIX_FMT_GRAY8) {
>          uint8_t *dnn_input_data = dnn_input->data;
>          av_assert0(dnn_input->dt == DNN_UINT8);
>          for (int i = 0; i < frame->height; i++) {
> -            for(int j = 0; j < frame->width * 3; j++) {
> +            for(int j = 0; j < frame->width; j++) {
>                  int k = i * frame->linesize[0] + j;
> -                int t = i * frame->width * 3 + j;
> +                int t = i * frame->width + j;
>                  dnn_input_data[t] = frame->data[0][k];
>              }
>          }
> +    } else if (frame->format == AV_PIX_FMT_GRAYF32) {
> +        float *dnn_input_data = dnn_input->data;
> +        av_assert0(dnn_input->dt == DNN_FLOAT);
> +        for (int i = 0; i < frame->height; i++) {
> +            for(int j = 0; j < frame->width; j++) {
> +                int k = i * frame->linesize[0] + j * sizeof(float);
> +                int t = i * frame->width + j;
> +                dnn_input_data[t] = *(float*)(frame->data[0] + k);
> +            }
> +        }
> +    } else {
> +        av_assert0(!"should not reach here.");
>      }
>
>      return 0;
> @@ -232,28 +279,49 @@ static int copy_from_frame_to_dnn(DNNData
> *dnn_input, const AVFrame *frame)
>
>  static int copy_from_dnn_to_frame(AVFrame *frame, const DNNData
> *dnn_output)
>  {
> -    // extend this function to support more formats
> -    av_assert0(frame->format == AV_PIX_FMT_RGB24 || frame->format ==
> AV_PIX_FMT_BGR24);
> -
> -    if (dnn_output->dt == DNN_FLOAT) {
> -        float *dnn_output_data = dnn_output->data;
> -        for (int i = 0; i < frame->height; i++) {
> -            for(int j = 0; j < frame->width * 3; j++) {
> -                int k = i * frame->linesize[0] + j;
> -                int t = i * frame->width * 3 + j;
> -                frame->data[0][k] =
> av_clip_uintp2((int)(dnn_output_data[t] * 255.0f), 8);
> +    if (frame->format == AV_PIX_FMT_RGB24 || frame->format ==
> AV_PIX_FMT_BGR24) {
> +        if (dnn_output->dt == DNN_FLOAT) {
> +            float *dnn_output_data = dnn_output->data;
> +            for (int i = 0; i < frame->height; i++) {
> +                for(int j = 0; j < frame->width * 3; j++) {
> +                    int k = i * frame->linesize[0] + j;
> +                    int t = i * frame->width * 3 + j;
> +                    frame->data[0][k] =
> av_clip_uintp2((int)(dnn_output_data[t] * 255.0f), 8);
> +                }
> +            }
> +        } else {
> +            uint8_t *dnn_output_data = dnn_output->data;
> +            av_assert0(dnn_output->dt == DNN_UINT8);
> +            for (int i = 0; i < frame->height; i++) {
> +                for(int j = 0; j < frame->width * 3; j++) {
> +                    int k = i * frame->linesize[0] + j;
> +                    int t = i * frame->width * 3 + j;
> +                    frame->data[0][k] = dnn_output_data[t];
> +                }
>              }
>          }
> -    } else {
> +    } else if (frame->format == AV_PIX_FMT_GRAY8) {
>          uint8_t *dnn_output_data = dnn_output->data;
>          av_assert0(dnn_output->dt == DNN_UINT8);
>          for (int i = 0; i < frame->height; i++) {
> -            for(int j = 0; j < frame->width * 3; j++) {
> +            for(int j = 0; j < frame->width; j++) {
>                  int k = i * frame->linesize[0] + j;
> -                int t = i * frame->width * 3 + j;
> +                int t = i * frame->width + j;
>                  frame->data[0][k] = dnn_output_data[t];
>              }
>          }
> +    } else if (frame->format == AV_PIX_FMT_GRAYF32) {
> +        float *dnn_output_data = dnn_output->data;
> +        av_assert0(dnn_output->dt == DNN_FLOAT);
> +        for (int i = 0; i < frame->height; i++) {
> +            for(int j = 0; j < frame->width; j++) {
> +                int k = i * frame->linesize[0] + j * sizeof(float);
> +                int t = i * frame->width + j;
> +                *(float*)(frame->data[0] + k) = dnn_output_data[t];
> +            }
> +        }
> +    } else {
> +        av_assert0(!"should not reach here.");
>      }
>
>      return 0;
> @@ -275,7 +343,14 @@ static int filter_frame(AVFilterLink *inlink, AVFrame
> *in)
>          av_frame_free(&in);
>          return AVERROR(EIO);
>      }
> -    av_assert0(ctx->output.channels == 3);
> +
> +    if (ctx->fmt == AV_PIX_FMT_RGB24 || ctx->fmt == AV_PIX_FMT_BGR24) {
> +        av_assert0(ctx->output.channels == 3);
> +    } else if (ctx->fmt == AV_PIX_FMT_GRAY8 || ctx->fmt ==
> AV_PIX_FMT_GRAYF32) {
> +        av_assert0(ctx->output.channels == 1);
> +    } else {
> +        av_assert0(!"should not reach here");
> +    }
>
>      out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
>      if (!out) {
> --
> 2.7.4
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Guo, Yejun Dec. 13, 2019, 11:23 a.m. UTC | #2
> From: Pedro Arthur [mailto:bygrandao@gmail.com]

> Sent: Friday, December 13, 2019 12:45 AM

> To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>

> Cc: Guo, Yejun <yejun.guo@intel.com>

> Subject: Re: [FFmpeg-devel] [PATCH 3/4] avfilter/vf_dnn_processing: add

> format GRAY8 and GRAYF32 support

> Hi,

> 

> how should I test this patch?


the fourth patch of this patch set is the fate test for this feature, so I ignored comments here.
I'll add the test descriptions back in v2.

> 

> Em sex., 22 de nov. de 2019 às 04:57, Guo, Yejun <yejun.guo at intel.com>

> escreveu:

> 

> > Signed-off-by: Guo, Yejun <yejun.guo at intel.com>

> > ---

> >  doc/filters.texi                |   8 ++-

> >  libavfilter/vf_dnn_processing.c | 147

> > ++++++++++++++++++++++++++++++----------

> >  2 files changed, 118 insertions(+), 37 deletions(-)

> >

> > diff --git a/doc/filters.texi b/doc/filters.texi

> > index 1f86ae1..c3f7997 100644

> > --- a/doc/filters.texi

> > +++ b/doc/filters.texi

> > @@ -8992,7 +8992,13 @@ Set the input name of the dnn network.

> >  Set the output name of the dnn network.

> >

> >  @item fmt

> > -Set the pixel format for the Frame. Allowed values are

> > @code{AV_PIX_FMT_RGB24}, and @code{AV_PIX_FMT_BGR24}.

> > +Set the pixel format for the Frame, the value is determined by the input

> > of the dnn network model.

> >

> This sentence is a bit confusing, also I think this property should be

> removed. (I will explain bellow).


sure, no problem.

> 

> +

> > +If the model handles RGB (or BGR) image and the data type of model input

> > is uint8, fmt must be @code{AV_PIX_FMT_RGB24} (or

> @code{AV_PIX_FMT_BGR24}.

> > +If the model handles RGB (or BGR) image and the data type of model input

> > is float, fmt must be @code{AV_PIX_FMT_RGB24} (or

> @code{AV_PIX_FMT_BGR24},

> > and this filter will do data type conversion internally.

> > +If the model handles GRAY image and the data type of model input is

> > uint8, fmt must be @code{AV_PIX_FMT_GRAY8}.

> > +If the model handles GRAY image and the data type of model input is

> > float, fmt must be @code{AV_PIX_FMT_GRAYF32}.

> > +

> >  Default value is @code{AV_PIX_FMT_RGB24}.

> >

> >  @end table

> > diff --git a/libavfilter/vf_dnn_processing.c

> > b/libavfilter/vf_dnn_processing.c

> > index ce976ec..963dd5e 100644

> > --- a/libavfilter/vf_dnn_processing.c

> > +++ b/libavfilter/vf_dnn_processing.c

> > @@ -70,10 +70,12 @@ static av_cold int init(AVFilterContext *context)

> >  {

> >      DnnProcessingContext *ctx = context->priv;

> >      int supported = 0;

> > -    // as the first step, only rgb24 and bgr24 are supported

> > +    // to support more formats

> >      const enum AVPixelFormat supported_pixel_fmts[] = {

> >          AV_PIX_FMT_RGB24,

> >          AV_PIX_FMT_BGR24,

> > +        AV_PIX_FMT_GRAY8,

> > +        AV_PIX_FMT_GRAYF32,

> >      };

> >      for (int i = 0; i < sizeof(supported_pixel_fmts) / sizeof(enum

> > AVPixelFormat); ++i) {

> >          if (supported_pixel_fmts[i] == ctx->fmt) {

> > @@ -156,14 +158,38 @@ static int config_input(AVFilterLink *inlink)

> >          return AVERROR(EIO);

> >      }

> >

> I think the filter should not check formats manually in the init function

> (unless I'm missing something), it would be best if you query for all the

> above supported formats in query_formats and later in config_input you make

> sure the expected model format matches the frame format.


I'm afraid it is too late if we find the format mismatch in function config_input.

For example, the dnn module is designed to accept BGR24 data, and the actual
format comes into config_input is RGB24 or YUV420P (we'll add yuv formats later in
supported pixel fmts) or something else such as GRAY8. We have two choices:

- return error, and the application ends.
  This is not what we want.
- return no_error, and do format conversion at the beginning of function filter_frame.
  It makes this filter complex, and our implementation for the conversion might not be the best optimized.
  My idea is to keep this filter simple. And the users can choose what they want to do conversion in the filters graph.

Regarding the below description in doc/filters.texi, the reason is that we do not have format such as rgbf32 and bgrf32, we have to do conversion within this filter.
"If the model handles RGB (or BGR) image and the data type of model input is float, fmt must be @code{AV_PIX_FMT_RGB24} (or @code{AV_PIX
_FMT_BGR24}, and this filter will do data type conversion internally."


Consider the filter vf_dnn_analytic in my plan, I think the conversion is necessary since mostly a scale down is needed.

> 

> 

> > -    if (model_input.channels != 3) {

> > -        av_log(ctx, AV_LOG_ERROR, "the model requires input channels

> > %d\n",

> > -                                   model_input.channels);

> > -        return AVERROR(EIO);

> > -    }

> > -    if (model_input.dt != DNN_FLOAT && model_input.dt != DNN_UINT8) {

> > -        av_log(ctx, AV_LOG_ERROR, "only support dnn models with input

> > data type as float32 and uint8.\n");

> > -        return AVERROR(EIO);

> > +    if (ctx->fmt == AV_PIX_FMT_RGB24 || ctx->fmt == AV_PIX_FMT_BGR24)

> {

> > +        if (model_input.channels != 3) {

> > +            av_log(ctx, AV_LOG_ERROR, "channel number 3 is required,

> but

> > the actual channel number is %d\n",

> > +                                       model_input.channels);

> > +            return AVERROR(EIO);

> > +        }

> > +        if (model_input.dt != DNN_FLOAT && model_input.dt !=

> DNN_UINT8) {

> > +            av_log(ctx, AV_LOG_ERROR, "only support dnn models with

> input

> > data type as float32 and uint8.\n");

> > +            return AVERROR(EIO);

> > +        }

> > +    } else if (ctx->fmt == AV_PIX_FMT_GRAY8) {

> > +        if (model_input.channels != 1) {

> > +            av_log(ctx, AV_LOG_ERROR, "channel number 1 is required,

> but

> > the actual channel number is %d\n",

> > +                                       model_input.channels);

> > +            return AVERROR(EIO);

> > +        }

> > +        if (model_input.dt != DNN_UINT8) {

> > +            av_log(ctx, AV_LOG_ERROR, "only support dnn models with

> input

> > data type as uint8.\n");

> > +            return AVERROR(EIO);

> > +        }

> > +    } else if (ctx->fmt == AV_PIX_FMT_GRAYF32) {

> > +        if (model_input.channels != 1) {

> > +            av_log(ctx, AV_LOG_ERROR, "channel number 1 is required,

> but

> > the actual channel number is %d\n",

> > +                                       model_input.channels);

> > +            return AVERROR(EIO);

> > +        }

> > +        if (model_input.dt != DNN_FLOAT) {

> > +            av_log(ctx, AV_LOG_ERROR, "only support dnn models with

> input

> > data type as float.\n");

> > +            return AVERROR(EIO);

> > +        }

> > +    } else {

> > +        av_assert0(!"should not reach here.");

> >      }

> >

> General comment on the above and following chained ifs testing pixel

> formats, personally,  using switch(fmt) seems more readable.


good point, I'll refine the code.
Pedro Arthur Dec. 13, 2019, 2:40 p.m. UTC | #3
Em sex., 13 de dez. de 2019 às 08:23, Guo, Yejun <yejun.guo@intel.com> escreveu:
>
> > From: Pedro Arthur [mailto:bygrandao@gmail.com]
> > Sent: Friday, December 13, 2019 12:45 AM
> > To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
> > Cc: Guo, Yejun <yejun.guo@intel.com>
> > Subject: Re: [FFmpeg-devel] [PATCH 3/4] avfilter/vf_dnn_processing: add
> > format GRAY8 and GRAYF32 support
> > Hi,
> >
> > how should I test this patch?
>
> the fourth patch of this patch set is the fate test for this feature, so I ignored comments here.
> I'll add the test descriptions back in v2.
>
> >
> > Em sex., 22 de nov. de 2019 às 04:57, Guo, Yejun <yejun.guo at intel.com>
> > escreveu:
> >
> > > Signed-off-by: Guo, Yejun <yejun.guo at intel.com>
> > > ---
> > >  doc/filters.texi                |   8 ++-
> > >  libavfilter/vf_dnn_processing.c | 147
> > > ++++++++++++++++++++++++++++++----------
> > >  2 files changed, 118 insertions(+), 37 deletions(-)
> > >
> > > diff --git a/doc/filters.texi b/doc/filters.texi
> > > index 1f86ae1..c3f7997 100644
> > > --- a/doc/filters.texi
> > > +++ b/doc/filters.texi
> > > @@ -8992,7 +8992,13 @@ Set the input name of the dnn network.
> > >  Set the output name of the dnn network.
> > >
> > >  @item fmt
> > > -Set the pixel format for the Frame. Allowed values are
> > > @code{AV_PIX_FMT_RGB24}, and @code{AV_PIX_FMT_BGR24}.
> > > +Set the pixel format for the Frame, the value is determined by the input
> > > of the dnn network model.
> > >
> > This sentence is a bit confusing, also I think this property should be
> > removed. (I will explain bellow).
>
> sure, no problem.
>
> >
> > +
> > > +If the model handles RGB (or BGR) image and the data type of model input
> > > is uint8, fmt must be @code{AV_PIX_FMT_RGB24} (or
> > @code{AV_PIX_FMT_BGR24}.
> > > +If the model handles RGB (or BGR) image and the data type of model input
> > > is float, fmt must be @code{AV_PIX_FMT_RGB24} (or
> > @code{AV_PIX_FMT_BGR24},
> > > and this filter will do data type conversion internally.
> > > +If the model handles GRAY image and the data type of model input is
> > > uint8, fmt must be @code{AV_PIX_FMT_GRAY8}.
> > > +If the model handles GRAY image and the data type of model input is
> > > float, fmt must be @code{AV_PIX_FMT_GRAYF32}.
> > > +
> > >  Default value is @code{AV_PIX_FMT_RGB24}.
> > >
> > >  @end table
> > > diff --git a/libavfilter/vf_dnn_processing.c
> > > b/libavfilter/vf_dnn_processing.c
> > > index ce976ec..963dd5e 100644
> > > --- a/libavfilter/vf_dnn_processing.c
> > > +++ b/libavfilter/vf_dnn_processing.c
> > > @@ -70,10 +70,12 @@ static av_cold int init(AVFilterContext *context)
> > >  {
> > >      DnnProcessingContext *ctx = context->priv;
> > >      int supported = 0;
> > > -    // as the first step, only rgb24 and bgr24 are supported
> > > +    // to support more formats
> > >      const enum AVPixelFormat supported_pixel_fmts[] = {
> > >          AV_PIX_FMT_RGB24,
> > >          AV_PIX_FMT_BGR24,
> > > +        AV_PIX_FMT_GRAY8,
> > > +        AV_PIX_FMT_GRAYF32,
> > >      };
> > >      for (int i = 0; i < sizeof(supported_pixel_fmts) / sizeof(enum
> > > AVPixelFormat); ++i) {
> > >          if (supported_pixel_fmts[i] == ctx->fmt) {
> > > @@ -156,14 +158,38 @@ static int config_input(AVFilterLink *inlink)
> > >          return AVERROR(EIO);
> > >      }
> > >
> > I think the filter should not check formats manually in the init function
> > (unless I'm missing something), it would be best if you query for all the
> > above supported formats in query_formats and later in config_input you make
> > sure the expected model format matches the frame format.
>
> I'm afraid it is too late if we find the format mismatch in function config_input.
>
> For example, the dnn module is designed to accept BGR24 data, and the actual
> format comes into config_input is RGB24 or YUV420P (we'll add yuv formats later in
> supported pixel fmts) or something else such as GRAY8. We have two choices:
>
> - return error, and the application ends.
>   This is not what we want.
> - return no_error, and do format conversion at the beginning of function filter_frame.
>   It makes this filter complex, and our implementation for the conversion might not be the best optimized.
>   My idea is to keep this filter simple. And the users can choose what they want to do conversion in the filters graph.
>
Indeed if the filter receives the format already converted is much
better, but if you already have to specify the format the model
expects as a parameter one could simply use the -pix_fmt to set the
format instead of having one more filter parameter.
Is there any downside if it uses "-pix_fmt" that "fmt" avoids?

> Regarding the below description in doc/filters.texi, the reason is that we do not have format such as rgbf32 and bgrf32, we have to do conversion within this filter.
> "If the model handles RGB (or BGR) image and the data type of model input is float, fmt must be @code{AV_PIX_FMT_RGB24} (or @code{AV_PIX
> _FMT_BGR24}, and this filter will do data type conversion internally."
>
>
> Consider the filter vf_dnn_analytic in my plan, I think the conversion is necessary since mostly a scale down is needed.
>
> >
> >
> > > -    if (model_input.channels != 3) {
> > > -        av_log(ctx, AV_LOG_ERROR, "the model requires input channels
> > > %d\n",
> > > -                                   model_input.channels);
> > > -        return AVERROR(EIO);
> > > -    }
> > > -    if (model_input.dt != DNN_FLOAT && model_input.dt != DNN_UINT8) {
> > > -        av_log(ctx, AV_LOG_ERROR, "only support dnn models with input
> > > data type as float32 and uint8.\n");
> > > -        return AVERROR(EIO);
> > > +    if (ctx->fmt == AV_PIX_FMT_RGB24 || ctx->fmt == AV_PIX_FMT_BGR24)
> > {
> > > +        if (model_input.channels != 3) {
> > > +            av_log(ctx, AV_LOG_ERROR, "channel number 3 is required,
> > but
> > > the actual channel number is %d\n",
> > > +                                       model_input.channels);
> > > +            return AVERROR(EIO);
> > > +        }
> > > +        if (model_input.dt != DNN_FLOAT && model_input.dt !=
> > DNN_UINT8) {
> > > +            av_log(ctx, AV_LOG_ERROR, "only support dnn models with
> > input
> > > data type as float32 and uint8.\n");
> > > +            return AVERROR(EIO);
> > > +        }
> > > +    } else if (ctx->fmt == AV_PIX_FMT_GRAY8) {
> > > +        if (model_input.channels != 1) {
> > > +            av_log(ctx, AV_LOG_ERROR, "channel number 1 is required,
> > but
> > > the actual channel number is %d\n",
> > > +                                       model_input.channels);
> > > +            return AVERROR(EIO);
> > > +        }
> > > +        if (model_input.dt != DNN_UINT8) {
> > > +            av_log(ctx, AV_LOG_ERROR, "only support dnn models with
> > input
> > > data type as uint8.\n");
> > > +            return AVERROR(EIO);
> > > +        }
> > > +    } else if (ctx->fmt == AV_PIX_FMT_GRAYF32) {
> > > +        if (model_input.channels != 1) {
> > > +            av_log(ctx, AV_LOG_ERROR, "channel number 1 is required,
> > but
> > > the actual channel number is %d\n",
> > > +                                       model_input.channels);
> > > +            return AVERROR(EIO);
> > > +        }
> > > +        if (model_input.dt != DNN_FLOAT) {
> > > +            av_log(ctx, AV_LOG_ERROR, "only support dnn models with
> > input
> > > data type as float.\n");
> > > +            return AVERROR(EIO);
> > > +        }
> > > +    } else {
> > > +        av_assert0(!"should not reach here.");
> > >      }
> > >
> > General comment on the above and following chained ifs testing pixel
> > formats, personally,  using switch(fmt) seems more readable.
>
> good point, I'll refine the code.
Guo, Yejun Dec. 16, 2019, 11:18 a.m. UTC | #4
> -----Original Message-----

> From: Pedro Arthur [mailto:bygrandao@gmail.com]

> Sent: Friday, December 13, 2019 10:40 PM

> To: Guo, Yejun <yejun.guo@intel.com>

> Cc: ffmpeg-devel@ffmpeg.org

> Subject: Re: [FFmpeg-devel] [PATCH 3/4] avfilter/vf_dnn_processing: add

> format GRAY8 and GRAYF32 support

> 

> Em sex., 13 de dez. de 2019 às 08:23, Guo, Yejun <yejun.guo@intel.com>

> escreveu:

> >

> > > From: Pedro Arthur [mailto:bygrandao@gmail.com]

> > > Sent: Friday, December 13, 2019 12:45 AM

> > > To: FFmpeg development discussions and patches

> <ffmpeg-devel@ffmpeg.org>

> > > Cc: Guo, Yejun <yejun.guo@intel.com>

> > > Subject: Re: [FFmpeg-devel] [PATCH 3/4] avfilter/vf_dnn_processing: add

> > > format GRAY8 and GRAYF32 support

> > > Hi,

> > >

> > > how should I test this patch?

> >

> > the fourth patch of this patch set is the fate test for this feature, so I ignored

> comments here.

> > I'll add the test descriptions back in v2.

> >

> > >

> > > Em sex., 22 de nov. de 2019 às 04:57, Guo, Yejun <yejun.guo at intel.com>

> > > escreveu:

> > >

> > > > Signed-off-by: Guo, Yejun <yejun.guo at intel.com>

> > > > ---

> > > >  doc/filters.texi                |   8 ++-

> > > >  libavfilter/vf_dnn_processing.c | 147

> > > > ++++++++++++++++++++++++++++++----------

> > > >  2 files changed, 118 insertions(+), 37 deletions(-)

> > > >

> > > > diff --git a/doc/filters.texi b/doc/filters.texi

> > > > index 1f86ae1..c3f7997 100644

> > > > --- a/doc/filters.texi

> > > > +++ b/doc/filters.texi

> > > > @@ -8992,7 +8992,13 @@ Set the input name of the dnn network.

> > > >  Set the output name of the dnn network.

> > > >

> > > >  @item fmt

> > > > -Set the pixel format for the Frame. Allowed values are

> > > > @code{AV_PIX_FMT_RGB24}, and @code{AV_PIX_FMT_BGR24}.

> > > > +Set the pixel format for the Frame, the value is determined by the input

> > > > of the dnn network model.

> > > >

> > > This sentence is a bit confusing, also I think this property should be

> > > removed. (I will explain bellow).

> >

> > sure, no problem.

> >

> > >

> > > +

> > > > +If the model handles RGB (or BGR) image and the data type of model

> input

> > > > is uint8, fmt must be @code{AV_PIX_FMT_RGB24} (or

> > > @code{AV_PIX_FMT_BGR24}.

> > > > +If the model handles RGB (or BGR) image and the data type of model

> input

> > > > is float, fmt must be @code{AV_PIX_FMT_RGB24} (or

> > > @code{AV_PIX_FMT_BGR24},

> > > > and this filter will do data type conversion internally.

> > > > +If the model handles GRAY image and the data type of model input is

> > > > uint8, fmt must be @code{AV_PIX_FMT_GRAY8}.

> > > > +If the model handles GRAY image and the data type of model input is

> > > > float, fmt must be @code{AV_PIX_FMT_GRAYF32}.

> > > > +

> > > >  Default value is @code{AV_PIX_FMT_RGB24}.

> > > >

> > > >  @end table

> > > > diff --git a/libavfilter/vf_dnn_processing.c

> > > > b/libavfilter/vf_dnn_processing.c

> > > > index ce976ec..963dd5e 100644

> > > > --- a/libavfilter/vf_dnn_processing.c

> > > > +++ b/libavfilter/vf_dnn_processing.c

> > > > @@ -70,10 +70,12 @@ static av_cold int init(AVFilterContext *context)

> > > >  {

> > > >      DnnProcessingContext *ctx = context->priv;

> > > >      int supported = 0;

> > > > -    // as the first step, only rgb24 and bgr24 are supported

> > > > +    // to support more formats

> > > >      const enum AVPixelFormat supported_pixel_fmts[] = {

> > > >          AV_PIX_FMT_RGB24,

> > > >          AV_PIX_FMT_BGR24,

> > > > +        AV_PIX_FMT_GRAY8,

> > > > +        AV_PIX_FMT_GRAYF32,

> > > >      };

> > > >      for (int i = 0; i < sizeof(supported_pixel_fmts) / sizeof(enum

> > > > AVPixelFormat); ++i) {

> > > >          if (supported_pixel_fmts[i] == ctx->fmt) {

> > > > @@ -156,14 +158,38 @@ static int config_input(AVFilterLink *inlink)

> > > >          return AVERROR(EIO);

> > > >      }

> > > >

> > > I think the filter should not check formats manually in the init function

> > > (unless I'm missing something), it would be best if you query for all the

> > > above supported formats in query_formats and later in config_input you

> make

> > > sure the expected model format matches the frame format.

> >

> > I'm afraid it is too late if we find the format mismatch in function

> config_input.

> >

> > For example, the dnn module is designed to accept BGR24 data, and the

> actual

> > format comes into config_input is RGB24 or YUV420P (we'll add yuv formats

> later in

> > supported pixel fmts) or something else such as GRAY8. We have two choices:

> >

> > - return error, and the application ends.

> >   This is not what we want.

> > - return no_error, and do format conversion at the beginning of function

> filter_frame.

> >   It makes this filter complex, and our implementation for the conversion

> might not be the best optimized.

> >   My idea is to keep this filter simple. And the users can choose what they

> want to do conversion in the filters graph.

> >

> Indeed if the filter receives the format already converted is much

> better, but if you already have to specify the format the model

> expects as a parameter one could simply use the -pix_fmt to set the

> format instead of having one more filter parameter.

> Is there any downside if it uses "-pix_fmt" that "fmt" avoids?


I see, your meaning is to add a format conversion filter explicitly before dnn_processing.

so, there are two options.
1). no parameter 'fmt' for dnn_processing
We must specify the format filter (or other filters converting format) explicitly, the command line looks like:
-vf "filter0, filter1, ..., format=pix_fmts=rgb24, dnn_processing=model=my_model_file, ..."

The advantage is dnn_processing is simpler without 'fmt'.
The disadvantage is we need to change parameters of two filters (format and dnn_processing) when a new model with different input is used.


2). dnn_processing has parameter 'fmt'.
This option supports two styles of command line:
a) -vf "filter0, filter1, ..., dnn_processing=model=my_model_file:fmt=rgb24, ..."
The filter 'format=pix_fmts=rgb24' is automatically inserted by ffmpeg, so we just need to change the parameters
of one filter (dnn_processing) when a new model with different input is used.

b) -vf "filter0, filter1, ..., format=pix_fmts=rgb24, dnn_processing=model=my_model_file:fmt=rgb24, ..."
It is redundant for the rgb24.


Another point is that logically the format is tied closely with the input of the model, I think
it's more nature to put them together in one filter, so I prefer option 2). Anyway, each option has
advantages and disadvantages, I do not find an ideal one and not insist on option 2).


> 

> > Regarding the below description in doc/filters.texi, the reason is that we do

> not have format such as rgbf32 and bgrf32, we have to do conversion within

> this filter.

> > "If the model handles RGB (or BGR) image and the data type of model input is

> float, fmt must be @code{AV_PIX_FMT_RGB24} (or @code{AV_PIX

> > _FMT_BGR24}, and this filter will do data type conversion internally."

> >

> >

> > Consider the filter vf_dnn_analytic in my plan, I think the conversion is

> necessary since mostly a scale down is needed.

> >
Jun Zhao Dec. 16, 2019, 11:42 a.m. UTC | #5
On Mon, Dec 16, 2019 at 7:18 PM Guo, Yejun <yejun.guo@intel.com> wrote:
>
>
>
> > -----Original Message-----
> > From: Pedro Arthur [mailto:bygrandao@gmail.com]
> > Sent: Friday, December 13, 2019 10:40 PM
> > To: Guo, Yejun <yejun.guo@intel.com>
> > Cc: ffmpeg-devel@ffmpeg.org
> > Subject: Re: [FFmpeg-devel] [PATCH 3/4] avfilter/vf_dnn_processing: add
> > format GRAY8 and GRAYF32 support
> >
> > Em sex., 13 de dez. de 2019 às 08:23, Guo, Yejun <yejun.guo@intel.com>
> > escreveu:
> > >
> > > > From: Pedro Arthur [mailto:bygrandao@gmail.com]
> > > > Sent: Friday, December 13, 2019 12:45 AM
> > > > To: FFmpeg development discussions and patches
> > <ffmpeg-devel@ffmpeg.org>
> > > > Cc: Guo, Yejun <yejun.guo@intel.com>
> > > > Subject: Re: [FFmpeg-devel] [PATCH 3/4] avfilter/vf_dnn_processing: add
> > > > format GRAY8 and GRAYF32 support
> > > > Hi,
> > > >
> > > > how should I test this patch?
> > >
> > > the fourth patch of this patch set is the fate test for this feature, so I ignored
> > comments here.
> > > I'll add the test descriptions back in v2.
> > >
> > > >
> > > > Em sex., 22 de nov. de 2019 às 04:57, Guo, Yejun <yejun.guo at intel.com>
> > > > escreveu:
> > > >
> > > > > Signed-off-by: Guo, Yejun <yejun.guo at intel.com>
> > > > > ---
> > > > >  doc/filters.texi                |   8 ++-
> > > > >  libavfilter/vf_dnn_processing.c | 147
> > > > > ++++++++++++++++++++++++++++++----------
> > > > >  2 files changed, 118 insertions(+), 37 deletions(-)
> > > > >
> > > > > diff --git a/doc/filters.texi b/doc/filters.texi
> > > > > index 1f86ae1..c3f7997 100644
> > > > > --- a/doc/filters.texi
> > > > > +++ b/doc/filters.texi
> > > > > @@ -8992,7 +8992,13 @@ Set the input name of the dnn network.
> > > > >  Set the output name of the dnn network.
> > > > >
> > > > >  @item fmt
> > > > > -Set the pixel format for the Frame. Allowed values are
> > > > > @code{AV_PIX_FMT_RGB24}, and @code{AV_PIX_FMT_BGR24}.
> > > > > +Set the pixel format for the Frame, the value is determined by the input
> > > > > of the dnn network model.
> > > > >
> > > > This sentence is a bit confusing, also I think this property should be
> > > > removed. (I will explain bellow).
> > >
> > > sure, no problem.
> > >
> > > >
> > > > +
> > > > > +If the model handles RGB (or BGR) image and the data type of model
> > input
> > > > > is uint8, fmt must be @code{AV_PIX_FMT_RGB24} (or
> > > > @code{AV_PIX_FMT_BGR24}.
> > > > > +If the model handles RGB (or BGR) image and the data type of model
> > input
> > > > > is float, fmt must be @code{AV_PIX_FMT_RGB24} (or
> > > > @code{AV_PIX_FMT_BGR24},
> > > > > and this filter will do data type conversion internally.
> > > > > +If the model handles GRAY image and the data type of model input is
> > > > > uint8, fmt must be @code{AV_PIX_FMT_GRAY8}.
> > > > > +If the model handles GRAY image and the data type of model input is
> > > > > float, fmt must be @code{AV_PIX_FMT_GRAYF32}.
> > > > > +
> > > > >  Default value is @code{AV_PIX_FMT_RGB24}.
> > > > >
> > > > >  @end table
> > > > > diff --git a/libavfilter/vf_dnn_processing.c
> > > > > b/libavfilter/vf_dnn_processing.c
> > > > > index ce976ec..963dd5e 100644
> > > > > --- a/libavfilter/vf_dnn_processing.c
> > > > > +++ b/libavfilter/vf_dnn_processing.c
> > > > > @@ -70,10 +70,12 @@ static av_cold int init(AVFilterContext *context)
> > > > >  {
> > > > >      DnnProcessingContext *ctx = context->priv;
> > > > >      int supported = 0;
> > > > > -    // as the first step, only rgb24 and bgr24 are supported
> > > > > +    // to support more formats
> > > > >      const enum AVPixelFormat supported_pixel_fmts[] = {
> > > > >          AV_PIX_FMT_RGB24,
> > > > >          AV_PIX_FMT_BGR24,
> > > > > +        AV_PIX_FMT_GRAY8,
> > > > > +        AV_PIX_FMT_GRAYF32,
> > > > >      };
> > > > >      for (int i = 0; i < sizeof(supported_pixel_fmts) / sizeof(enum
> > > > > AVPixelFormat); ++i) {
> > > > >          if (supported_pixel_fmts[i] == ctx->fmt) {
> > > > > @@ -156,14 +158,38 @@ static int config_input(AVFilterLink *inlink)
> > > > >          return AVERROR(EIO);
> > > > >      }
> > > > >
> > > > I think the filter should not check formats manually in the init function
> > > > (unless I'm missing something), it would be best if you query for all the
> > > > above supported formats in query_formats and later in config_input you
> > make
> > > > sure the expected model format matches the frame format.
> > >
> > > I'm afraid it is too late if we find the format mismatch in function
> > config_input.
> > >
> > > For example, the dnn module is designed to accept BGR24 data, and the
> > actual
> > > format comes into config_input is RGB24 or YUV420P (we'll add yuv formats
> > later in
> > > supported pixel fmts) or something else such as GRAY8. We have two choices:
> > >
> > > - return error, and the application ends.
> > >   This is not what we want.
> > > - return no_error, and do format conversion at the beginning of function
> > filter_frame.
> > >   It makes this filter complex, and our implementation for the conversion
> > might not be the best optimized.
> > >   My idea is to keep this filter simple. And the users can choose what they
> > want to do conversion in the filters graph.
> > >
> > Indeed if the filter receives the format already converted is much
> > better, but if you already have to specify the format the model
> > expects as a parameter one could simply use the -pix_fmt to set the
> > format instead of having one more filter parameter.
> > Is there any downside if it uses "-pix_fmt" that "fmt" avoids?
>
> I see, your meaning is to add a format conversion filter explicitly before dnn_processing.
>
> so, there are two options.
> 1). no parameter 'fmt' for dnn_processing
> We must specify the format filter (or other filters converting format) explicitly, the command line looks like:
> -vf "filter0, filter1, ..., format=pix_fmts=rgb24, dnn_processing=model=my_model_file, ..."
>
> The advantage is dnn_processing is simpler without 'fmt'.
> The disadvantage is we need to change parameters of two filters (format and dnn_processing) when a new model with different input is used.
>
>
> 2). dnn_processing has parameter 'fmt'.
> This option supports two styles of command line:
> a) -vf "filter0, filter1, ..., dnn_processing=model=my_model_file:fmt=rgb24, ..."
> The filter 'format=pix_fmts=rgb24' is automatically inserted by ffmpeg, so we just need to change the parameters
> of one filter (dnn_processing) when a new model with different input is used.
>
> b) -vf "filter0, filter1, ..., format=pix_fmts=rgb24, dnn_processing=model=my_model_file:fmt=rgb24, ..."
> It is redundant for the rgb24.
>
>
> Another point is that logically the format is tied closely with the input of the model, I think
> it's more nature to put them together in one filter, so I prefer option 2). Anyway, each option has
> advantages and disadvantages, I do not find an ideal one and not insist on option 2).
>
>

Based on the flexibility, I prefer option 1
Guo, Yejun Dec. 16, 2019, 12:39 p.m. UTC | #6
> -----Original Message-----

> From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On Behalf Of

> mypopy@gmail.com

> Sent: Monday, December 16, 2019 7:43 PM

> To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>

> Cc: Pedro Arthur <bygrandao@gmail.com>

> Subject: Re: [FFmpeg-devel] [PATCH 3/4] avfilter/vf_dnn_processing: add

> format GRAY8 and GRAYF32 support

> 

> On Mon, Dec 16, 2019 at 7:18 PM Guo, Yejun <yejun.guo@intel.com> wrote:

> >

> >

> >

> > > -----Original Message-----

> > > From: Pedro Arthur [mailto:bygrandao@gmail.com]

> > > Sent: Friday, December 13, 2019 10:40 PM

> > > To: Guo, Yejun <yejun.guo@intel.com>

> > > Cc: ffmpeg-devel@ffmpeg.org

> > > Subject: Re: [FFmpeg-devel] [PATCH 3/4] avfilter/vf_dnn_processing: add

> > > format GRAY8 and GRAYF32 support

> > >

> > > Em sex., 13 de dez. de 2019 às 08:23, Guo, Yejun <yejun.guo@intel.com>

> > > escreveu:

> > > >

> > > > > From: Pedro Arthur [mailto:bygrandao@gmail.com]

> > > > > Sent: Friday, December 13, 2019 12:45 AM

> > > > > To: FFmpeg development discussions and patches

> > > <ffmpeg-devel@ffmpeg.org>

> > > > > Cc: Guo, Yejun <yejun.guo@intel.com>

> > > > > Subject: Re: [FFmpeg-devel] [PATCH 3/4] avfilter/vf_dnn_processing: add

> > > > > format GRAY8 and GRAYF32 support

> > > > > Hi,

> > > > >

> > > > > how should I test this patch?

> > > >

> > > > the fourth patch of this patch set is the fate test for this feature, so I

> ignored

> > > comments here.

> > > > I'll add the test descriptions back in v2.

> > > >

> > > > >

> > > > > Em sex., 22 de nov. de 2019 às 04:57, Guo, Yejun <yejun.guo at

> intel.com>

> > > > > escreveu:

> > > > >

> > > > > > Signed-off-by: Guo, Yejun <yejun.guo at intel.com>

> > > > > > ---

> > > > > >  doc/filters.texi                |   8 ++-

> > > > > >  libavfilter/vf_dnn_processing.c | 147

> > > > > > ++++++++++++++++++++++++++++++----------

> > > > > >  2 files changed, 118 insertions(+), 37 deletions(-)

> > > > > >

> > > > > > diff --git a/doc/filters.texi b/doc/filters.texi

> > > > > > index 1f86ae1..c3f7997 100644

> > > > > > --- a/doc/filters.texi

> > > > > > +++ b/doc/filters.texi

> > > > > > @@ -8992,7 +8992,13 @@ Set the input name of the dnn network.

> > > > > >  Set the output name of the dnn network.

> > > > > >

> > > > > >  @item fmt

> > > > > > -Set the pixel format for the Frame. Allowed values are

> > > > > > @code{AV_PIX_FMT_RGB24}, and @code{AV_PIX_FMT_BGR24}.

> > > > > > +Set the pixel format for the Frame, the value is determined by the

> input

> > > > > > of the dnn network model.

> > > > > >

> > > > > This sentence is a bit confusing, also I think this property should be

> > > > > removed. (I will explain bellow).

> > > >

> > > > sure, no problem.

> > > >

> > > > >

> > > > > +

> > > > > > +If the model handles RGB (or BGR) image and the data type of model

> > > input

> > > > > > is uint8, fmt must be @code{AV_PIX_FMT_RGB24} (or

> > > > > @code{AV_PIX_FMT_BGR24}.

> > > > > > +If the model handles RGB (or BGR) image and the data type of model

> > > input

> > > > > > is float, fmt must be @code{AV_PIX_FMT_RGB24} (or

> > > > > @code{AV_PIX_FMT_BGR24},

> > > > > > and this filter will do data type conversion internally.

> > > > > > +If the model handles GRAY image and the data type of model input is

> > > > > > uint8, fmt must be @code{AV_PIX_FMT_GRAY8}.

> > > > > > +If the model handles GRAY image and the data type of model input is

> > > > > > float, fmt must be @code{AV_PIX_FMT_GRAYF32}.

> > > > > > +

> > > > > >  Default value is @code{AV_PIX_FMT_RGB24}.

> > > > > >

> > > > > >  @end table

> > > > > > diff --git a/libavfilter/vf_dnn_processing.c

> > > > > > b/libavfilter/vf_dnn_processing.c

> > > > > > index ce976ec..963dd5e 100644

> > > > > > --- a/libavfilter/vf_dnn_processing.c

> > > > > > +++ b/libavfilter/vf_dnn_processing.c

> > > > > > @@ -70,10 +70,12 @@ static av_cold int init(AVFilterContext

> *context)

> > > > > >  {

> > > > > >      DnnProcessingContext *ctx = context->priv;

> > > > > >      int supported = 0;

> > > > > > -    // as the first step, only rgb24 and bgr24 are supported

> > > > > > +    // to support more formats

> > > > > >      const enum AVPixelFormat supported_pixel_fmts[] = {

> > > > > >          AV_PIX_FMT_RGB24,

> > > > > >          AV_PIX_FMT_BGR24,

> > > > > > +        AV_PIX_FMT_GRAY8,

> > > > > > +        AV_PIX_FMT_GRAYF32,

> > > > > >      };

> > > > > >      for (int i = 0; i < sizeof(supported_pixel_fmts) / sizeof(enum

> > > > > > AVPixelFormat); ++i) {

> > > > > >          if (supported_pixel_fmts[i] == ctx->fmt) {

> > > > > > @@ -156,14 +158,38 @@ static int config_input(AVFilterLink *inlink)

> > > > > >          return AVERROR(EIO);

> > > > > >      }

> > > > > >

> > > > > I think the filter should not check formats manually in the init function

> > > > > (unless I'm missing something), it would be best if you query for all the

> > > > > above supported formats in query_formats and later in config_input you

> > > make

> > > > > sure the expected model format matches the frame format.

> > > >

> > > > I'm afraid it is too late if we find the format mismatch in function

> > > config_input.

> > > >

> > > > For example, the dnn module is designed to accept BGR24 data, and the

> > > actual

> > > > format comes into config_input is RGB24 or YUV420P (we'll add yuv

> formats

> > > later in

> > > > supported pixel fmts) or something else such as GRAY8. We have two

> choices:

> > > >

> > > > - return error, and the application ends.

> > > >   This is not what we want.

> > > > - return no_error, and do format conversion at the beginning of function

> > > filter_frame.

> > > >   It makes this filter complex, and our implementation for the conversion

> > > might not be the best optimized.

> > > >   My idea is to keep this filter simple. And the users can choose what

> they

> > > want to do conversion in the filters graph.

> > > >

> > > Indeed if the filter receives the format already converted is much

> > > better, but if you already have to specify the format the model

> > > expects as a parameter one could simply use the -pix_fmt to set the

> > > format instead of having one more filter parameter.

> > > Is there any downside if it uses "-pix_fmt" that "fmt" avoids?

> >

> > I see, your meaning is to add a format conversion filter explicitly before

> dnn_processing.

> >

> > so, there are two options.

> > 1). no parameter 'fmt' for dnn_processing

> > We must specify the format filter (or other filters converting format) explicitly,

> the command line looks like:

> > -vf "filter0, filter1, ..., format=pix_fmts=rgb24,

> dnn_processing=model=my_model_file, ..."

> >

> > The advantage is dnn_processing is simpler without 'fmt'.

> > The disadvantage is we need to change parameters of two filters (format and

> dnn_processing) when a new model with different input is used.

> >

> >

> > 2). dnn_processing has parameter 'fmt'.

> > This option supports two styles of command line:

> > a) -vf "filter0, filter1, ...,

> dnn_processing=model=my_model_file:fmt=rgb24, ..."

> > The filter 'format=pix_fmts=rgb24' is automatically inserted by ffmpeg, so we

> just need to change the parameters

> > of one filter (dnn_processing) when a new model with different input is used.

> >

> > b) -vf "filter0, filter1, ..., format=pix_fmts=rgb24,

> dnn_processing=model=my_model_file:fmt=rgb24, ..."

> > It is redundant for the rgb24.

> >

> >

> > Another point is that logically the format is tied closely with the input of the

> model, I think

> > it's more nature to put them together in one filter, so I prefer option 2).

> Anyway, each option has

> > advantages and disadvantages, I do not find an ideal one and not insist on

> option 2).

> >

> >

> 

> Based on the flexibility, I prefer option 1


thanks for the discussion, I might not explain the background well. 
For a given dnn model, the fmt is determined. 'fmt' has two meanings, the format of the Frame and also denotes the input format of dnn model.

I think the advantage of option 1 is simpler, but the disadvantage of option 1 is no flexible since
we need to change parameters of two filters when a new model with different input is used.

> _______________________________________________

> ffmpeg-devel mailing list

> ffmpeg-devel@ffmpeg.org

> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

> 

> To unsubscribe, visit link above, or email

> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Pedro Arthur Dec. 16, 2019, 2:14 p.m. UTC | #7
Em seg., 16 de dez. de 2019 às 09:39, Guo, Yejun <yejun.guo@intel.com> escreveu:
>
>
>
> > -----Original Message-----
> > From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On Behalf Of
> > mypopy@gmail.com
> > Sent: Monday, December 16, 2019 7:43 PM
> > To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
> > Cc: Pedro Arthur <bygrandao@gmail.com>
> > Subject: Re: [FFmpeg-devel] [PATCH 3/4] avfilter/vf_dnn_processing: add
> > format GRAY8 and GRAYF32 support
> >
> > On Mon, Dec 16, 2019 at 7:18 PM Guo, Yejun <yejun.guo@intel.com> wrote:
> > >
> > >
> > >
> > > > -----Original Message-----
> > > > From: Pedro Arthur [mailto:bygrandao@gmail.com]
> > > > Sent: Friday, December 13, 2019 10:40 PM
> > > > To: Guo, Yejun <yejun.guo@intel.com>
> > > > Cc: ffmpeg-devel@ffmpeg.org
> > > > Subject: Re: [FFmpeg-devel] [PATCH 3/4] avfilter/vf_dnn_processing: add
> > > > format GRAY8 and GRAYF32 support
> > > >
> > > > Em sex., 13 de dez. de 2019 às 08:23, Guo, Yejun <yejun.guo@intel.com>
> > > > escreveu:
> > > > >
> > > > > > From: Pedro Arthur [mailto:bygrandao@gmail.com]
> > > > > > Sent: Friday, December 13, 2019 12:45 AM
> > > > > > To: FFmpeg development discussions and patches
> > > > <ffmpeg-devel@ffmpeg.org>
> > > > > > Cc: Guo, Yejun <yejun.guo@intel.com>
> > > > > > Subject: Re: [FFmpeg-devel] [PATCH 3/4] avfilter/vf_dnn_processing: add
> > > > > > format GRAY8 and GRAYF32 support
> > > > > > Hi,
> > > > > >
> > > > > > how should I test this patch?
> > > > >
> > > > > the fourth patch of this patch set is the fate test for this feature, so I
> > ignored
> > > > comments here.
> > > > > I'll add the test descriptions back in v2.
> > > > >
> > > > > >
> > > > > > Em sex., 22 de nov. de 2019 às 04:57, Guo, Yejun <yejun.guo at
> > intel.com>
> > > > > > escreveu:
> > > > > >
> > > > > > > Signed-off-by: Guo, Yejun <yejun.guo at intel.com>
> > > > > > > ---
> > > > > > >  doc/filters.texi                |   8 ++-
> > > > > > >  libavfilter/vf_dnn_processing.c | 147
> > > > > > > ++++++++++++++++++++++++++++++----------
> > > > > > >  2 files changed, 118 insertions(+), 37 deletions(-)
> > > > > > >
> > > > > > > diff --git a/doc/filters.texi b/doc/filters.texi
> > > > > > > index 1f86ae1..c3f7997 100644
> > > > > > > --- a/doc/filters.texi
> > > > > > > +++ b/doc/filters.texi
> > > > > > > @@ -8992,7 +8992,13 @@ Set the input name of the dnn network.
> > > > > > >  Set the output name of the dnn network.
> > > > > > >
> > > > > > >  @item fmt
> > > > > > > -Set the pixel format for the Frame. Allowed values are
> > > > > > > @code{AV_PIX_FMT_RGB24}, and @code{AV_PIX_FMT_BGR24}.
> > > > > > > +Set the pixel format for the Frame, the value is determined by the
> > input
> > > > > > > of the dnn network model.
> > > > > > >
> > > > > > This sentence is a bit confusing, also I think this property should be
> > > > > > removed. (I will explain bellow).
> > > > >
> > > > > sure, no problem.
> > > > >
> > > > > >
> > > > > > +
> > > > > > > +If the model handles RGB (or BGR) image and the data type of model
> > > > input
> > > > > > > is uint8, fmt must be @code{AV_PIX_FMT_RGB24} (or
> > > > > > @code{AV_PIX_FMT_BGR24}.
> > > > > > > +If the model handles RGB (or BGR) image and the data type of model
> > > > input
> > > > > > > is float, fmt must be @code{AV_PIX_FMT_RGB24} (or
> > > > > > @code{AV_PIX_FMT_BGR24},
> > > > > > > and this filter will do data type conversion internally.
> > > > > > > +If the model handles GRAY image and the data type of model input is
> > > > > > > uint8, fmt must be @code{AV_PIX_FMT_GRAY8}.
> > > > > > > +If the model handles GRAY image and the data type of model input is
> > > > > > > float, fmt must be @code{AV_PIX_FMT_GRAYF32}.
> > > > > > > +
> > > > > > >  Default value is @code{AV_PIX_FMT_RGB24}.
> > > > > > >
> > > > > > >  @end table
> > > > > > > diff --git a/libavfilter/vf_dnn_processing.c
> > > > > > > b/libavfilter/vf_dnn_processing.c
> > > > > > > index ce976ec..963dd5e 100644
> > > > > > > --- a/libavfilter/vf_dnn_processing.c
> > > > > > > +++ b/libavfilter/vf_dnn_processing.c
> > > > > > > @@ -70,10 +70,12 @@ static av_cold int init(AVFilterContext
> > *context)
> > > > > > >  {
> > > > > > >      DnnProcessingContext *ctx = context->priv;
> > > > > > >      int supported = 0;
> > > > > > > -    // as the first step, only rgb24 and bgr24 are supported
> > > > > > > +    // to support more formats
> > > > > > >      const enum AVPixelFormat supported_pixel_fmts[] = {
> > > > > > >          AV_PIX_FMT_RGB24,
> > > > > > >          AV_PIX_FMT_BGR24,
> > > > > > > +        AV_PIX_FMT_GRAY8,
> > > > > > > +        AV_PIX_FMT_GRAYF32,
> > > > > > >      };
> > > > > > >      for (int i = 0; i < sizeof(supported_pixel_fmts) / sizeof(enum
> > > > > > > AVPixelFormat); ++i) {
> > > > > > >          if (supported_pixel_fmts[i] == ctx->fmt) {
> > > > > > > @@ -156,14 +158,38 @@ static int config_input(AVFilterLink *inlink)
> > > > > > >          return AVERROR(EIO);
> > > > > > >      }
> > > > > > >
> > > > > > I think the filter should not check formats manually in the init function
> > > > > > (unless I'm missing something), it would be best if you query for all the
> > > > > > above supported formats in query_formats and later in config_input you
> > > > make
> > > > > > sure the expected model format matches the frame format.
> > > > >
> > > > > I'm afraid it is too late if we find the format mismatch in function
> > > > config_input.
> > > > >
> > > > > For example, the dnn module is designed to accept BGR24 data, and the
> > > > actual
> > > > > format comes into config_input is RGB24 or YUV420P (we'll add yuv
> > formats
> > > > later in
> > > > > supported pixel fmts) or something else such as GRAY8. We have two
> > choices:
> > > > >
> > > > > - return error, and the application ends.
> > > > >   This is not what we want.
> > > > > - return no_error, and do format conversion at the beginning of function
> > > > filter_frame.
> > > > >   It makes this filter complex, and our implementation for the conversion
> > > > might not be the best optimized.
> > > > >   My idea is to keep this filter simple. And the users can choose what
> > they
> > > > want to do conversion in the filters graph.
> > > > >
> > > > Indeed if the filter receives the format already converted is much
> > > > better, but if you already have to specify the format the model
> > > > expects as a parameter one could simply use the -pix_fmt to set the
> > > > format instead of having one more filter parameter.
> > > > Is there any downside if it uses "-pix_fmt" that "fmt" avoids?
> > >
> > > I see, your meaning is to add a format conversion filter explicitly before
> > dnn_processing.
> > >
> > > so, there are two options.
> > > 1). no parameter 'fmt' for dnn_processing
> > > We must specify the format filter (or other filters converting format) explicitly,
> > the command line looks like:
> > > -vf "filter0, filter1, ..., format=pix_fmts=rgb24,
> > dnn_processing=model=my_model_file, ..."
> > >
> > > The advantage is dnn_processing is simpler without 'fmt'.
> > > The disadvantage is we need to change parameters of two filters (format and
> > dnn_processing) when a new model with different input is used.
> > >
> > >
> > > 2). dnn_processing has parameter 'fmt'.
> > > This option supports two styles of command line:
> > > a) -vf "filter0, filter1, ...,
> > dnn_processing=model=my_model_file:fmt=rgb24, ..."
> > > The filter 'format=pix_fmts=rgb24' is automatically inserted by ffmpeg, so we
> > just need to change the parameters
> > > of one filter (dnn_processing) when a new model with different input is used.
> > >
> > > b) -vf "filter0, filter1, ..., format=pix_fmts=rgb24,
> > dnn_processing=model=my_model_file:fmt=rgb24, ..."
> > > It is redundant for the rgb24.
> > >
> > >
> > > Another point is that logically the format is tied closely with the input of the
> > model, I think
> > > it's more nature to put them together in one filter, so I prefer option 2).
> > Anyway, each option has
> > > advantages and disadvantages, I do not find an ideal one and not insist on
> > option 2).
> > >
> > >
> >
> > Based on the flexibility, I prefer option 1
>
> thanks for the discussion, I might not explain the background well.
> For a given dnn model, the fmt is determined. 'fmt' has two meanings, the format of the Frame and also denotes the input format of dnn model.
I'm not 100% sure about what you mean, but if the model determines the
expected format, it should be present in the model file and require no
manual setting by the user, at least for our own model file format.

>
> I think the advantage of option 1 is simpler, but the disadvantage of option 1 is no flexible since
> we need to change parameters of two filters when a new model with different input is used.
I agree we have one more filter with option 1 but it seems a more
standard (and direct) way of setting formats. If it can achieve the
same features as option 2 I would still prefer option 1.
If you have a specific use case which would be penalized by using
option 1 let us know.

>
> > _______________________________________________
> > ffmpeg-devel mailing list
> > ffmpeg-devel@ffmpeg.org
> > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> >
> > To unsubscribe, visit link above, or email
> > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Guo, Yejun Dec. 17, 2019, 7:05 a.m. UTC | #8
> > > > > >

> > > > > > > From: Pedro Arthur [mailto:bygrandao@gmail.com]

> > > > > > > Sent: Friday, December 13, 2019 12:45 AM

> > > > > > > To: FFmpeg development discussions and patches

> > > > > <ffmpeg-devel@ffmpeg.org>

> > > > > > > Cc: Guo, Yejun <yejun.guo@intel.com>

> > > > > > > Subject: Re: [FFmpeg-devel] [PATCH 3/4] avfilter/vf_dnn_processing:

> add

> > > > > > > format GRAY8 and GRAYF32 support

> > > > > > > Hi,

> > > > > > >

> > > > > > > how should I test this patch?

> > > > > >

> > > > > > the fourth patch of this patch set is the fate test for this feature, so I

> > > ignored

> > > > > comments here.

> > > > > > I'll add the test descriptions back in v2.

> > > > > >

> > > > > > >

> > > > > > > Em sex., 22 de nov. de 2019 às 04:57, Guo, Yejun <yejun.guo at

> > > intel.com>

> > > > > > > escreveu:

> > > > > > >

> > > > > > > > Signed-off-by: Guo, Yejun <yejun.guo at intel.com>

> > > > > > > > ---

> > > > > > > >  doc/filters.texi                |   8 ++-

> > > > > > > >  libavfilter/vf_dnn_processing.c | 147

> > > > > > > > ++++++++++++++++++++++++++++++----------

> > > > > > > >  2 files changed, 118 insertions(+), 37 deletions(-)

> > > > > > > >

> > > > > > > > diff --git a/doc/filters.texi b/doc/filters.texi

> > > > > > > > index 1f86ae1..c3f7997 100644

> > > > > > > > --- a/doc/filters.texi

> > > > > > > > +++ b/doc/filters.texi

> > > > > > > > @@ -8992,7 +8992,13 @@ Set the input name of the dnn

> network.

> > > > > > > >  Set the output name of the dnn network.

> > > > > > > >

> > > > > > > >  @item fmt

> > > > > > > > -Set the pixel format for the Frame. Allowed values are

> > > > > > > > @code{AV_PIX_FMT_RGB24}, and @code{AV_PIX_FMT_BGR24}.

> > > > > > > > +Set the pixel format for the Frame, the value is determined by the

> > > input

> > > > > > > > of the dnn network model.

> > > > > > > >

> > > > > > > This sentence is a bit confusing, also I think this property should be

> > > > > > > removed. (I will explain bellow).

> > > > > >

> > > > > > sure, no problem.

> > > > > >

> > > > > > >

> > > > > > > +

> > > > > > > > +If the model handles RGB (or BGR) image and the data type of

> model

> > > > > input

> > > > > > > > is uint8, fmt must be @code{AV_PIX_FMT_RGB24} (or

> > > > > > > @code{AV_PIX_FMT_BGR24}.

> > > > > > > > +If the model handles RGB (or BGR) image and the data type of

> model

> > > > > input

> > > > > > > > is float, fmt must be @code{AV_PIX_FMT_RGB24} (or

> > > > > > > @code{AV_PIX_FMT_BGR24},

> > > > > > > > and this filter will do data type conversion internally.

> > > > > > > > +If the model handles GRAY image and the data type of model

> input is

> > > > > > > > uint8, fmt must be @code{AV_PIX_FMT_GRAY8}.

> > > > > > > > +If the model handles GRAY image and the data type of model

> input is

> > > > > > > > float, fmt must be @code{AV_PIX_FMT_GRAYF32}.

> > > > > > > > +

> > > > > > > >  Default value is @code{AV_PIX_FMT_RGB24}.

> > > > > > > >

> > > > > > > >  @end table

> > > > > > > > diff --git a/libavfilter/vf_dnn_processing.c

> > > > > > > > b/libavfilter/vf_dnn_processing.c

> > > > > > > > index ce976ec..963dd5e 100644

> > > > > > > > --- a/libavfilter/vf_dnn_processing.c

> > > > > > > > +++ b/libavfilter/vf_dnn_processing.c

> > > > > > > > @@ -70,10 +70,12 @@ static av_cold int init(AVFilterContext

> > > *context)

> > > > > > > >  {

> > > > > > > >      DnnProcessingContext *ctx = context->priv;

> > > > > > > >      int supported = 0;

> > > > > > > > -    // as the first step, only rgb24 and bgr24 are supported

> > > > > > > > +    // to support more formats

> > > > > > > >      const enum AVPixelFormat supported_pixel_fmts[] = {

> > > > > > > >          AV_PIX_FMT_RGB24,

> > > > > > > >          AV_PIX_FMT_BGR24,

> > > > > > > > +        AV_PIX_FMT_GRAY8,

> > > > > > > > +        AV_PIX_FMT_GRAYF32,

> > > > > > > >      };

> > > > > > > >      for (int i = 0; i < sizeof(supported_pixel_fmts) / sizeof(enum

> > > > > > > > AVPixelFormat); ++i) {

> > > > > > > >          if (supported_pixel_fmts[i] == ctx->fmt) {

> > > > > > > > @@ -156,14 +158,38 @@ static int config_input(AVFilterLink

> *inlink)

> > > > > > > >          return AVERROR(EIO);

> > > > > > > >      }

> > > > > > > >

> > > > > > > I think the filter should not check formats manually in the init

> function

> > > > > > > (unless I'm missing something), it would be best if you query for all

> the

> > > > > > > above supported formats in query_formats and later in config_input

> you

> > > > > make

> > > > > > > sure the expected model format matches the frame format.

> > > > > >

> > > > > > I'm afraid it is too late if we find the format mismatch in function

> > > > > config_input.

> > > > > >

> > > > > > For example, the dnn module is designed to accept BGR24 data, and

> the

> > > > > actual

> > > > > > format comes into config_input is RGB24 or YUV420P (we'll add yuv

> > > formats

> > > > > later in

> > > > > > supported pixel fmts) or something else such as GRAY8. We have two

> > > choices:

> > > > > >

> > > > > > - return error, and the application ends.

> > > > > >   This is not what we want.

> > > > > > - return no_error, and do format conversion at the beginning of

> function

> > > > > filter_frame.

> > > > > >   It makes this filter complex, and our implementation for the

> conversion

> > > > > might not be the best optimized.

> > > > > >   My idea is to keep this filter simple. And the users can choose what

> > > they

> > > > > want to do conversion in the filters graph.

> > > > > >

> > > > > Indeed if the filter receives the format already converted is much

> > > > > better, but if you already have to specify the format the model

> > > > > expects as a parameter one could simply use the -pix_fmt to set the

> > > > > format instead of having one more filter parameter.

> > > > > Is there any downside if it uses "-pix_fmt" that "fmt" avoids?

> > > >

> > > > I see, your meaning is to add a format conversion filter explicitly before

> > > dnn_processing.

> > > >

> > > > so, there are two options.

> > > > 1). no parameter 'fmt' for dnn_processing

> > > > We must specify the format filter (or other filters converting format)

> explicitly,

> > > the command line looks like:

> > > > -vf "filter0, filter1, ..., format=pix_fmts=rgb24,

> > > dnn_processing=model=my_model_file, ..."

> > > >

> > > > The advantage is dnn_processing is simpler without 'fmt'.

> > > > The disadvantage is we need to change parameters of two filters (format

> and

> > > dnn_processing) when a new model with different input is used.

> > > >

> > > >

> > > > 2). dnn_processing has parameter 'fmt'.

> > > > This option supports two styles of command line:

> > > > a) -vf "filter0, filter1, ...,

> > > dnn_processing=model=my_model_file:fmt=rgb24, ..."

> > > > The filter 'format=pix_fmts=rgb24' is automatically inserted by ffmpeg, so

> we

> > > just need to change the parameters

> > > > of one filter (dnn_processing) when a new model with different input is

> used.

> > > >

> > > > b) -vf "filter0, filter1, ..., format=pix_fmts=rgb24,

> > > dnn_processing=model=my_model_file:fmt=rgb24, ..."

> > > > It is redundant for the rgb24.

> > > >

> > > >

> > > > Another point is that logically the format is tied closely with the input of

> the

> > > model, I think

> > > > it's more nature to put them together in one filter, so I prefer option 2).

> > > Anyway, each option has

> > > > advantages and disadvantages, I do not find an ideal one and not insist on

> > > option 2).

> > > >

> > > >

> > >

> > > Based on the flexibility, I prefer option 1

> >

> > thanks for the discussion, I might not explain the background well.

> > For a given dnn model, the fmt is determined. 'fmt' has two meanings, the

> format of the Frame and also denotes the input format of dnn model.

> I'm not 100% sure about what you mean, but if the model determines the

> expected format, it should be present in the model file and require no

> manual setting by the user, at least for our own model file format.


yes, I agree your opinion that the format *should* be present in the model file.
But, I checked the model file of tensorflow, caffe, pytorch and openvino, such
format information is all missing. In other words, the model file contains the
channel number (3 for rgb, 1 for gray/Y, ...) and the data type (float32 or uint8, ...),
but it does not contain the meaning of the first channel is Red (RGB24 for example) or Blue (BGR24 for example)...
and it does not denote it requires gray data or Y value when channel number is 1.

This is the reason that OpenCV function (cv::dnn::blobFromImage) has a bool flag swapRB to denote
the base format is RGB or BGR, see more detail at https://docs.opencv.org/master/d6/d0f/group__dnn.html#ga29f34df9376379a603acd8df581ac8d7

We can add a command line parameter for python script generate.py to ask user to set the format
when executing the script to convert .pb file to .model file. I'll add into todo list with low priority.

> 

> >

> > I think the advantage of option 1 is simpler, but the disadvantage of option 1

> is no flexible since

> > we need to change parameters of two filters when a new model with

> different input is used.

> I agree we have one more filter with option 1 but it seems a more

> standard (and direct) way of setting formats. If it can achieve the

> same features as option 2 I would still prefer option 1.

> If you have a specific use case which would be penalized by using

> option 1 let us know.


I do not have such use case, let's change to option 1.

> 

> >

> > > _______________________________________________

> > > ffmpeg-devel mailing list

> > > ffmpeg-devel@ffmpeg.org

> > > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

> > >

> > > To unsubscribe, visit link above, or email

> > > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
diff mbox

Patch

diff --git a/doc/filters.texi b/doc/filters.texi
index 1f86ae1..c3f7997 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -8992,7 +8992,13 @@  Set the input name of the dnn network.
 Set the output name of the dnn network.
 
 @item fmt
-Set the pixel format for the Frame. Allowed values are @code{AV_PIX_FMT_RGB24}, and @code{AV_PIX_FMT_BGR24}.
+Set the pixel format for the Frame, the value is determined by the input of the dnn network model.
+
+If the model handles RGB (or BGR) image and the data type of model input is uint8, fmt must be @code{AV_PIX_FMT_RGB24} (or @code{AV_PIX_FMT_BGR24}.
+If the model handles RGB (or BGR) image and the data type of model input is float, fmt must be @code{AV_PIX_FMT_RGB24} (or @code{AV_PIX_FMT_BGR24}, and this filter will do data type conversion internally.
+If the model handles GRAY image and the data type of model input is uint8, fmt must be @code{AV_PIX_FMT_GRAY8}.
+If the model handles GRAY image and the data type of model input is float, fmt must be @code{AV_PIX_FMT_GRAYF32}.
+
 Default value is @code{AV_PIX_FMT_RGB24}.
 
 @end table
diff --git a/libavfilter/vf_dnn_processing.c b/libavfilter/vf_dnn_processing.c
index ce976ec..963dd5e 100644
--- a/libavfilter/vf_dnn_processing.c
+++ b/libavfilter/vf_dnn_processing.c
@@ -70,10 +70,12 @@  static av_cold int init(AVFilterContext *context)
 {
     DnnProcessingContext *ctx = context->priv;
     int supported = 0;
-    // as the first step, only rgb24 and bgr24 are supported
+    // to support more formats
     const enum AVPixelFormat supported_pixel_fmts[] = {
         AV_PIX_FMT_RGB24,
         AV_PIX_FMT_BGR24,
+        AV_PIX_FMT_GRAY8,
+        AV_PIX_FMT_GRAYF32,
     };
     for (int i = 0; i < sizeof(supported_pixel_fmts) / sizeof(enum AVPixelFormat); ++i) {
         if (supported_pixel_fmts[i] == ctx->fmt) {
@@ -156,14 +158,38 @@  static int config_input(AVFilterLink *inlink)
         return AVERROR(EIO);
     }
 
-    if (model_input.channels != 3) {
-        av_log(ctx, AV_LOG_ERROR, "the model requires input channels %d\n",
-                                   model_input.channels);
-        return AVERROR(EIO);
-    }
-    if (model_input.dt != DNN_FLOAT && model_input.dt != DNN_UINT8) {
-        av_log(ctx, AV_LOG_ERROR, "only support dnn models with input data type as float32 and uint8.\n");
-        return AVERROR(EIO);
+    if (ctx->fmt == AV_PIX_FMT_RGB24 || ctx->fmt == AV_PIX_FMT_BGR24) {
+        if (model_input.channels != 3) {
+            av_log(ctx, AV_LOG_ERROR, "channel number 3 is required, but the actual channel number is %d\n",
+                                       model_input.channels);
+            return AVERROR(EIO);
+        }
+        if (model_input.dt != DNN_FLOAT && model_input.dt != DNN_UINT8) {
+            av_log(ctx, AV_LOG_ERROR, "only support dnn models with input data type as float32 and uint8.\n");
+            return AVERROR(EIO);
+        }
+    } else if (ctx->fmt == AV_PIX_FMT_GRAY8) {
+        if (model_input.channels != 1) {
+            av_log(ctx, AV_LOG_ERROR, "channel number 1 is required, but the actual channel number is %d\n",
+                                       model_input.channels);
+            return AVERROR(EIO);
+        }
+        if (model_input.dt != DNN_UINT8) {
+            av_log(ctx, AV_LOG_ERROR, "only support dnn models with input data type as uint8.\n");
+            return AVERROR(EIO);
+        }
+    } else if (ctx->fmt == AV_PIX_FMT_GRAYF32) {
+        if (model_input.channels != 1) {
+            av_log(ctx, AV_LOG_ERROR, "channel number 1 is required, but the actual channel number is %d\n",
+                                       model_input.channels);
+            return AVERROR(EIO);
+        }
+        if (model_input.dt != DNN_FLOAT) {
+            av_log(ctx, AV_LOG_ERROR, "only support dnn models with input data type as float.\n");
+            return AVERROR(EIO);
+        }
+    } else {
+        av_assert0(!"should not reach here.");
     }
 
     ctx->input.width    = inlink->w;
@@ -203,28 +229,49 @@  static int config_output(AVFilterLink *outlink)
 
 static int copy_from_frame_to_dnn(DNNData *dnn_input, const AVFrame *frame)
 {
-    // extend this function to support more formats
-    av_assert0(frame->format == AV_PIX_FMT_RGB24 || frame->format == AV_PIX_FMT_BGR24);
-
-    if (dnn_input->dt == DNN_FLOAT) {
-        float *dnn_input_data = dnn_input->data;
-        for (int i = 0; i < frame->height; i++) {
-            for(int j = 0; j < frame->width * 3; j++) {
-                int k = i * frame->linesize[0] + j;
-                int t = i * frame->width * 3 + j;
-                dnn_input_data[t] = frame->data[0][k] / 255.0f;
+    if (frame->format == AV_PIX_FMT_RGB24 || frame->format == AV_PIX_FMT_BGR24) {
+        if (dnn_input->dt == DNN_FLOAT) {
+            float *dnn_input_data = dnn_input->data;
+            for (int i = 0; i < frame->height; i++) {
+                for(int j = 0; j < frame->width * 3; j++) {
+                    int k = i * frame->linesize[0] + j;
+                    int t = i * frame->width * 3 + j;
+                    dnn_input_data[t] = frame->data[0][k] / 255.0f;
+                }
+            }
+        } else {
+            uint8_t *dnn_input_data = dnn_input->data;
+            av_assert0(dnn_input->dt == DNN_UINT8);
+            for (int i = 0; i < frame->height; i++) {
+                for(int j = 0; j < frame->width * 3; j++) {
+                    int k = i * frame->linesize[0] + j;
+                    int t = i * frame->width * 3 + j;
+                    dnn_input_data[t] = frame->data[0][k];
+                }
             }
         }
-    } else {
+    } else if (frame->format == AV_PIX_FMT_GRAY8) {
         uint8_t *dnn_input_data = dnn_input->data;
         av_assert0(dnn_input->dt == DNN_UINT8);
         for (int i = 0; i < frame->height; i++) {
-            for(int j = 0; j < frame->width * 3; j++) {
+            for(int j = 0; j < frame->width; j++) {
                 int k = i * frame->linesize[0] + j;
-                int t = i * frame->width * 3 + j;
+                int t = i * frame->width + j;
                 dnn_input_data[t] = frame->data[0][k];
             }
         }
+    } else if (frame->format == AV_PIX_FMT_GRAYF32) {
+        float *dnn_input_data = dnn_input->data;
+        av_assert0(dnn_input->dt == DNN_FLOAT);
+        for (int i = 0; i < frame->height; i++) {
+            for(int j = 0; j < frame->width; j++) {
+                int k = i * frame->linesize[0] + j * sizeof(float);
+                int t = i * frame->width + j;
+                dnn_input_data[t] = *(float*)(frame->data[0] + k);
+            }
+        }
+    } else {
+        av_assert0(!"should not reach here.");
     }
 
     return 0;
@@ -232,28 +279,49 @@  static int copy_from_frame_to_dnn(DNNData *dnn_input, const AVFrame *frame)
 
 static int copy_from_dnn_to_frame(AVFrame *frame, const DNNData *dnn_output)
 {
-    // extend this function to support more formats
-    av_assert0(frame->format == AV_PIX_FMT_RGB24 || frame->format == AV_PIX_FMT_BGR24);
-
-    if (dnn_output->dt == DNN_FLOAT) {
-        float *dnn_output_data = dnn_output->data;
-        for (int i = 0; i < frame->height; i++) {
-            for(int j = 0; j < frame->width * 3; j++) {
-                int k = i * frame->linesize[0] + j;
-                int t = i * frame->width * 3 + j;
-                frame->data[0][k] = av_clip_uintp2((int)(dnn_output_data[t] * 255.0f), 8);
+    if (frame->format == AV_PIX_FMT_RGB24 || frame->format == AV_PIX_FMT_BGR24) {
+        if (dnn_output->dt == DNN_FLOAT) {
+            float *dnn_output_data = dnn_output->data;
+            for (int i = 0; i < frame->height; i++) {
+                for(int j = 0; j < frame->width * 3; j++) {
+                    int k = i * frame->linesize[0] + j;
+                    int t = i * frame->width * 3 + j;
+                    frame->data[0][k] = av_clip_uintp2((int)(dnn_output_data[t] * 255.0f), 8);
+                }
+            }
+        } else {
+            uint8_t *dnn_output_data = dnn_output->data;
+            av_assert0(dnn_output->dt == DNN_UINT8);
+            for (int i = 0; i < frame->height; i++) {
+                for(int j = 0; j < frame->width * 3; j++) {
+                    int k = i * frame->linesize[0] + j;
+                    int t = i * frame->width * 3 + j;
+                    frame->data[0][k] = dnn_output_data[t];
+                }
             }
         }
-    } else {
+    } else if (frame->format == AV_PIX_FMT_GRAY8) {
         uint8_t *dnn_output_data = dnn_output->data;
         av_assert0(dnn_output->dt == DNN_UINT8);
         for (int i = 0; i < frame->height; i++) {
-            for(int j = 0; j < frame->width * 3; j++) {
+            for(int j = 0; j < frame->width; j++) {
                 int k = i * frame->linesize[0] + j;
-                int t = i * frame->width * 3 + j;
+                int t = i * frame->width + j;
                 frame->data[0][k] = dnn_output_data[t];
             }
         }
+    } else if (frame->format == AV_PIX_FMT_GRAYF32) {
+        float *dnn_output_data = dnn_output->data;
+        av_assert0(dnn_output->dt == DNN_FLOAT);
+        for (int i = 0; i < frame->height; i++) {
+            for(int j = 0; j < frame->width; j++) {
+                int k = i * frame->linesize[0] + j * sizeof(float);
+                int t = i * frame->width + j;
+                *(float*)(frame->data[0] + k) = dnn_output_data[t];
+            }
+        }
+    } else {
+        av_assert0(!"should not reach here.");
     }
 
     return 0;
@@ -275,7 +343,14 @@  static int filter_frame(AVFilterLink *inlink, AVFrame *in)
         av_frame_free(&in);
         return AVERROR(EIO);
     }
-    av_assert0(ctx->output.channels == 3);
+
+    if (ctx->fmt == AV_PIX_FMT_RGB24 || ctx->fmt == AV_PIX_FMT_BGR24) {
+        av_assert0(ctx->output.channels == 3);
+    } else if (ctx->fmt == AV_PIX_FMT_GRAY8 || ctx->fmt == AV_PIX_FMT_GRAYF32) {
+        av_assert0(ctx->output.channels == 1);
+    } else {
+        av_assert0(!"should not reach here");
+    }
 
     out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
     if (!out) {