[FFmpeg-devel,6/7] libavfilter/vf_sr.c: Removes uint8 -> float and float -> uint8 conversions.

Submitted by Sergey Lavrushkin on Aug. 2, 2018, 6:52 p.m.

Details

Message ID 20180802185248.18168-7-dualfal@gmail.com
State Accepted
Commit 95cb2127adcfc1e8a2c77470523ec72beae5c905
Headers show

Commit Message

Sergey Lavrushkin Aug. 2, 2018, 6:52 p.m.
This patch removes conversions, declared inside the sr filter, and uses libswscale inside
the filter to perform them for only Y channel of input. The sr filter still has uint
formats as input, as it does not use chroma channels in models and these channels are
upscaled using libswscale, float formats for input would cause unnecessary conversions
during scaling for these channels.

---
 libavfilter/vf_sr.c | 134 +++++++++++++++++++---------------------------------
 1 file changed, 48 insertions(+), 86 deletions(-)

Comments

Rostislav Pehlivanov Aug. 14, 2018, 6:45 p.m.
On Thu, 2 Aug 2018 at 20:00, Sergey Lavrushkin <dualfal@gmail.com> wrote:

> This patch removes conversions, declared inside the sr filter, and uses
> libswscale inside
> the filter to perform them for only Y channel of input. The sr filter
> still has uint
> formats as input, as it does not use chroma channels in models and these
> channels are
> upscaled using libswscale, float formats for input would cause unnecessary
> conversions
> during scaling for these channels.
>
> ---
>  libavfilter/vf_sr.c | 134
> +++++++++++++++++++---------------------------------
>  1 file changed, 48 insertions(+), 86 deletions(-)
>
> diff --git a/libavfilter/vf_sr.c b/libavfilter/vf_sr.c
> index 944a0e28e7..5ad1baa4c0 100644
> --- a/libavfilter/vf_sr.c
> +++ b/libavfilter/vf_sr.c
> @@ -45,8 +45,8 @@ typedef struct SRContext {
>      DNNModel *model;
>      DNNData input, output;
>      int scale_factor;
> -    struct SwsContext *sws_context;
> -    int sws_slice_h;
> +    struct SwsContext *sws_contexts[3];
> +    int sws_slice_h, sws_input_linesize, sws_output_linesize;
>  } SRContext;
>
>  #define OFFSET(x) offsetof(SRContext, x)
> @@ -95,6 +95,10 @@ static av_cold int init(AVFilterContext *context)
>          return AVERROR(EIO);
>      }
>
> +    sr_context->sws_contexts[0] = NULL;
> +    sr_context->sws_contexts[1] = NULL;
> +    sr_context->sws_contexts[2] = NULL;
> +
>      return 0;
>  }
>
> @@ -110,6 +114,7 @@ static int query_formats(AVFilterContext *context)
>          av_log(context, AV_LOG_ERROR, "could not create formats list\n");
>          return AVERROR(ENOMEM);
>      }
> +
>      return ff_set_common_formats(context, formats_list);
>  }
>
> @@ -140,21 +145,31 @@ static int config_props(AVFilterLink *inlink)
>      else{
>          outlink->h = sr_context->output.height;
>          outlink->w = sr_context->output.width;
> +        sr_context->sws_contexts[1] =
> sws_getContext(sr_context->input.width, sr_context->input.height,
> AV_PIX_FMT_GRAY8,
> +
>  sr_context->input.width, sr_context->input.height, AV_PIX_FMT_GRAYF32,
> +                                                     0, NULL, NULL, NULL);
> +        sr_context->sws_input_linesize = sr_context->input.width << 2;
> +        sr_context->sws_contexts[2] =
> sws_getContext(sr_context->output.width, sr_context->output.height,
> AV_PIX_FMT_GRAYF32,
> +
>  sr_context->output.width, sr_context->output.height, AV_PIX_FMT_GRAY8,
> +                                                     0, NULL, NULL, NULL);
> +        sr_context->sws_output_linesize = sr_context->output.width << 2;
> +        if (!sr_context->sws_contexts[1] || !sr_context->sws_contexts[2]){
> +            av_log(context, AV_LOG_ERROR, "could not create SwsContext
> for conversions\n");
> +            return AVERROR(ENOMEM);
> +        }
>          switch (sr_context->model_type){
>          case SRCNN:
> -            sr_context->sws_context = sws_getContext(inlink->w,
> inlink->h, inlink->format,
> -                                                     outlink->w,
> outlink->h, outlink->format, SWS_BICUBIC, NULL, NULL, NULL);
> -            if (!sr_context->sws_context){
> -                av_log(context, AV_LOG_ERROR, "could not create
> SwsContext\n");
> +            sr_context->sws_contexts[0] = sws_getContext(inlink->w,
> inlink->h, inlink->format,
> +                                                         outlink->w,
> outlink->h, outlink->format,
> +                                                         SWS_BICUBIC,
> NULL, NULL, NULL);
> +            if (!sr_context->sws_contexts[0]){
> +                av_log(context, AV_LOG_ERROR, "could not create
> SwsContext for scaling\n");
>                  return AVERROR(ENOMEM);
>              }
>              sr_context->sws_slice_h = inlink->h;
>              break;
>          case ESPCN:
> -            if (inlink->format == AV_PIX_FMT_GRAY8){
> -                sr_context->sws_context = NULL;
> -            }
> -            else{
> +            if (inlink->format != AV_PIX_FMT_GRAY8){
>                  sws_src_h = sr_context->input.height;
>                  sws_src_w = sr_context->input.width;
>                  sws_dst_h = sr_context->output.height;
> @@ -184,13 +199,14 @@ static int config_props(AVFilterLink *inlink)
>                      sws_dst_w = AV_CEIL_RSHIFT(sws_dst_w, 2);
>                      break;
>                  default:
> -                    av_log(context, AV_LOG_ERROR, "could not create
> SwsContext for input pixel format");
> +                    av_log(context, AV_LOG_ERROR, "could not create
> SwsContext for scaling for given input pixel format");
>                      return AVERROR(EIO);
>                  }
> -                sr_context->sws_context = sws_getContext(sws_src_w,
> sws_src_h, AV_PIX_FMT_GRAY8,
> -                                                         sws_dst_w,
> sws_dst_h, AV_PIX_FMT_GRAY8, SWS_BICUBIC, NULL, NULL, NULL);
> -                if (!sr_context->sws_context){
> -                    av_log(context, AV_LOG_ERROR, "could not create
> SwsContext\n");
> +                sr_context->sws_contexts[0] = sws_getContext(sws_src_w,
> sws_src_h, AV_PIX_FMT_GRAY8,
> +                                                             sws_dst_w,
> sws_dst_h, AV_PIX_FMT_GRAY8,
> +                                                             SWS_BICUBIC,
> NULL, NULL, NULL);
> +                if (!sr_context->sws_contexts[0]){
> +                    av_log(context, AV_LOG_ERROR, "could not create
> SwsContext for scaling\n");
>                      return AVERROR(ENOMEM);
>                  }
>                  sr_context->sws_slice_h = sws_src_h;
> @@ -201,61 +217,12 @@ static int config_props(AVFilterLink *inlink)
>      }
>  }
>
> -typedef struct ThreadData{
> -    uint8_t *data;
> -    int data_linesize, height, width;
> -} ThreadData;
> -
> -static int uint8_to_float(AVFilterContext *context, void *arg, int jobnr,
> int nb_jobs)
> -{
> -    SRContext *sr_context = context->priv;
> -    const ThreadData *td = arg;
> -    const int slice_start = (td->height *  jobnr     ) / nb_jobs;
> -    const int slice_end   = (td->height * (jobnr + 1)) / nb_jobs;
> -    const uint8_t *src = td->data + slice_start * td->data_linesize;
> -    float *dst = sr_context->input.data + slice_start * td->width;
> -    int y, x;
> -
> -    for (y = slice_start; y < slice_end; ++y){
> -        for (x = 0; x < td->width; ++x){
> -            dst[x] = (float)src[x] / 255.0f;
> -        }
> -        src += td->data_linesize;
> -        dst += td->width;
> -    }
> -
> -    return 0;
> -}
> -
> -static int float_to_uint8(AVFilterContext *context, void *arg, int jobnr,
> int nb_jobs)
> -{
> -    SRContext *sr_context = context->priv;
> -    const ThreadData *td = arg;
> -    const int slice_start = (td->height *  jobnr     ) / nb_jobs;
> -    const int slice_end   = (td->height * (jobnr + 1)) / nb_jobs;
> -    const float *src = sr_context->output.data + slice_start * td->width;
> -    uint8_t *dst = td->data + slice_start * td->data_linesize;
> -    int y, x;
> -
> -    for (y = slice_start; y < slice_end; ++y){
> -        for (x = 0; x < td->width; ++x){
> -            dst[x] = (uint8_t)(255.0f * FFMIN(src[x], 1.0f));
> -        }
> -        src += td->width;
> -        dst += td->data_linesize;
> -    }
> -
> -    return 0;
> -}
> -
>  static int filter_frame(AVFilterLink *inlink, AVFrame *in)
>  {
>      AVFilterContext *context = inlink->dst;
>      SRContext *sr_context = context->priv;
>      AVFilterLink *outlink = context->outputs[0];
>      AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
> -    ThreadData td;
> -    int nb_threads;
>      DNNReturnType dnn_result;
>
>      if (!out){
> @@ -268,28 +235,23 @@ static int filter_frame(AVFilterLink *inlink,
> AVFrame *in)
>      out->width = sr_context->output.width;
>      switch (sr_context->model_type){
>      case SRCNN:
> -        sws_scale(sr_context->sws_context, (const uint8_t **)in->data,
> in->linesize,
> +        sws_scale(sr_context->sws_contexts[0], (const uint8_t
> **)in->data, in->linesize,
>                    0, sr_context->sws_slice_h, out->data, out->linesize);
> -        td.data = out->data[0];
> -        td.data_linesize = out->linesize[0];
> -        td.height = out->height;
> -        td.width = out->width;
> +
> +        sws_scale(sr_context->sws_contexts[1], (const uint8_t
> **)out->data, out->linesize,
> +                  0, out->height, (uint8_t *
> const*)(&sr_context->input.data), &sr_context->sws_input_linesize);
>          break;
>      case ESPCN:
> -        if (sr_context->sws_context){
> -            sws_scale(sr_context->sws_context, (const uint8_t
> **)(in->data + 1), in->linesize + 1,
> +        if (sr_context->sws_contexts[0]){
> +            sws_scale(sr_context->sws_contexts[0], (const uint8_t
> **)(in->data + 1), in->linesize + 1,
>                        0, sr_context->sws_slice_h, out->data + 1,
> out->linesize + 1);
> -            sws_scale(sr_context->sws_context, (const uint8_t
> **)(in->data + 2), in->linesize + 2,
> +            sws_scale(sr_context->sws_contexts[0], (const uint8_t
> **)(in->data + 2), in->linesize + 2,
>                        0, sr_context->sws_slice_h, out->data + 2,
> out->linesize + 2);
>          }
> -        td.data = in->data[0];
> -        td.data_linesize = in->linesize[0];
> -        td.height = in->height;
> -        td.width = in->width;
> -    }
>
> -    nb_threads = ff_filter_get_nb_threads(context);
> -    context->internal->execute(context, uint8_to_float, &td, NULL,
> FFMIN(td.height, nb_threads));
> +        sws_scale(sr_context->sws_contexts[1], (const uint8_t
> **)in->data, in->linesize,
> +                  0, in->height, (uint8_t *
> const*)(&sr_context->input.data), &sr_context->sws_input_linesize);
> +    }
>      av_frame_free(&in);
>
>      dnn_result =
> (sr_context->dnn_module->execute_model)(sr_context->model);
> @@ -298,17 +260,15 @@ static int filter_frame(AVFilterLink *inlink,
> AVFrame *in)
>          return AVERROR(EIO);
>      }
>
> -    td.data = out->data[0];
> -    td.data_linesize = out->linesize[0];
> -    td.height = out->height;
> -    td.width = out->width;
> -    context->internal->execute(context, float_to_uint8, &td, NULL,
> FFMIN(td.height, nb_threads));
> +    sws_scale(sr_context->sws_contexts[2], (const uint8_t
> **)(&sr_context->output.data), &sr_context->sws_output_linesize,
> +              0, out->height, (uint8_t * const*)out->data, out->linesize);
>
>      return ff_filter_frame(outlink, out);
>  }
>
>  static av_cold void uninit(AVFilterContext *context)
>  {
> +    int i;
>      SRContext *sr_context = context->priv;
>
>      if (sr_context->dnn_module){
> @@ -316,8 +276,10 @@ static av_cold void uninit(AVFilterContext *context)
>          av_freep(&sr_context->dnn_module);
>      }
>
> -    if (sr_context->sws_context){
> -        sws_freeContext(sr_context->sws_context);
> +    for (i = 0; i < 3; ++i){
> +        if (sr_context->sws_contexts[i]){
> +            sws_freeContext(sr_context->sws_contexts[i]);
> +        }
>      }
>  }
>
> --
> 2.14.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


You are planning to remove *all* conversion still, right? Its still
unacceptable that there *are* conversions.
Pedro Arthur Aug. 14, 2018, 7:07 p.m.
2018-08-14 15:45 GMT-03:00 Rostislav Pehlivanov <atomnuker@gmail.com>:
> On Thu, 2 Aug 2018 at 20:00, Sergey Lavrushkin <dualfal@gmail.com> wrote:
>
>> This patch removes conversions, declared inside the sr filter, and uses
>> libswscale inside
>> the filter to perform them for only Y channel of input. The sr filter
>> still has uint
>> formats as input, as it does not use chroma channels in models and these
>> channels are
>> upscaled using libswscale, float formats for input would cause unnecessary
>> conversions
>> during scaling for these channels.
>>
>> ---
>>  libavfilter/vf_sr.c | 134
>> +++++++++++++++++++---------------------------------
>>  1 file changed, 48 insertions(+), 86 deletions(-)
>>
>> diff --git a/libavfilter/vf_sr.c b/libavfilter/vf_sr.c
>> index 944a0e28e7..5ad1baa4c0 100644
>> --- a/libavfilter/vf_sr.c
>> +++ b/libavfilter/vf_sr.c
>> @@ -45,8 +45,8 @@ typedef struct SRContext {
>>      DNNModel *model;
>>      DNNData input, output;
>>      int scale_factor;
>> -    struct SwsContext *sws_context;
>> -    int sws_slice_h;
>> +    struct SwsContext *sws_contexts[3];
>> +    int sws_slice_h, sws_input_linesize, sws_output_linesize;
>>  } SRContext;
>>
>>  #define OFFSET(x) offsetof(SRContext, x)
>> @@ -95,6 +95,10 @@ static av_cold int init(AVFilterContext *context)
>>          return AVERROR(EIO);
>>      }
>>
>> +    sr_context->sws_contexts[0] = NULL;
>> +    sr_context->sws_contexts[1] = NULL;
>> +    sr_context->sws_contexts[2] = NULL;
>> +
>>      return 0;
>>  }
>>
>> @@ -110,6 +114,7 @@ static int query_formats(AVFilterContext *context)
>>          av_log(context, AV_LOG_ERROR, "could not create formats list\n");
>>          return AVERROR(ENOMEM);
>>      }
>> +
>>      return ff_set_common_formats(context, formats_list);
>>  }
>>
>> @@ -140,21 +145,31 @@ static int config_props(AVFilterLink *inlink)
>>      else{
>>          outlink->h = sr_context->output.height;
>>          outlink->w = sr_context->output.width;
>> +        sr_context->sws_contexts[1] =
>> sws_getContext(sr_context->input.width, sr_context->input.height,
>> AV_PIX_FMT_GRAY8,
>> +
>>  sr_context->input.width, sr_context->input.height, AV_PIX_FMT_GRAYF32,
>> +                                                     0, NULL, NULL, NULL);
>> +        sr_context->sws_input_linesize = sr_context->input.width << 2;
>> +        sr_context->sws_contexts[2] =
>> sws_getContext(sr_context->output.width, sr_context->output.height,
>> AV_PIX_FMT_GRAYF32,
>> +
>>  sr_context->output.width, sr_context->output.height, AV_PIX_FMT_GRAY8,
>> +                                                     0, NULL, NULL, NULL);
>> +        sr_context->sws_output_linesize = sr_context->output.width << 2;
>> +        if (!sr_context->sws_contexts[1] || !sr_context->sws_contexts[2]){
>> +            av_log(context, AV_LOG_ERROR, "could not create SwsContext
>> for conversions\n");
>> +            return AVERROR(ENOMEM);
>> +        }
>>          switch (sr_context->model_type){
>>          case SRCNN:
>> -            sr_context->sws_context = sws_getContext(inlink->w,
>> inlink->h, inlink->format,
>> -                                                     outlink->w,
>> outlink->h, outlink->format, SWS_BICUBIC, NULL, NULL, NULL);
>> -            if (!sr_context->sws_context){
>> -                av_log(context, AV_LOG_ERROR, "could not create
>> SwsContext\n");
>> +            sr_context->sws_contexts[0] = sws_getContext(inlink->w,
>> inlink->h, inlink->format,
>> +                                                         outlink->w,
>> outlink->h, outlink->format,
>> +                                                         SWS_BICUBIC,
>> NULL, NULL, NULL);
>> +            if (!sr_context->sws_contexts[0]){
>> +                av_log(context, AV_LOG_ERROR, "could not create
>> SwsContext for scaling\n");
>>                  return AVERROR(ENOMEM);
>>              }
>>              sr_context->sws_slice_h = inlink->h;
>>              break;
>>          case ESPCN:
>> -            if (inlink->format == AV_PIX_FMT_GRAY8){
>> -                sr_context->sws_context = NULL;
>> -            }
>> -            else{
>> +            if (inlink->format != AV_PIX_FMT_GRAY8){
>>                  sws_src_h = sr_context->input.height;
>>                  sws_src_w = sr_context->input.width;
>>                  sws_dst_h = sr_context->output.height;
>> @@ -184,13 +199,14 @@ static int config_props(AVFilterLink *inlink)
>>                      sws_dst_w = AV_CEIL_RSHIFT(sws_dst_w, 2);
>>                      break;
>>                  default:
>> -                    av_log(context, AV_LOG_ERROR, "could not create
>> SwsContext for input pixel format");
>> +                    av_log(context, AV_LOG_ERROR, "could not create
>> SwsContext for scaling for given input pixel format");
>>                      return AVERROR(EIO);
>>                  }
>> -                sr_context->sws_context = sws_getContext(sws_src_w,
>> sws_src_h, AV_PIX_FMT_GRAY8,
>> -                                                         sws_dst_w,
>> sws_dst_h, AV_PIX_FMT_GRAY8, SWS_BICUBIC, NULL, NULL, NULL);
>> -                if (!sr_context->sws_context){
>> -                    av_log(context, AV_LOG_ERROR, "could not create
>> SwsContext\n");
>> +                sr_context->sws_contexts[0] = sws_getContext(sws_src_w,
>> sws_src_h, AV_PIX_FMT_GRAY8,
>> +                                                             sws_dst_w,
>> sws_dst_h, AV_PIX_FMT_GRAY8,
>> +                                                             SWS_BICUBIC,
>> NULL, NULL, NULL);
>> +                if (!sr_context->sws_contexts[0]){
>> +                    av_log(context, AV_LOG_ERROR, "could not create
>> SwsContext for scaling\n");
>>                      return AVERROR(ENOMEM);
>>                  }
>>                  sr_context->sws_slice_h = sws_src_h;
>> @@ -201,61 +217,12 @@ static int config_props(AVFilterLink *inlink)
>>      }
>>  }
>>
>> -typedef struct ThreadData{
>> -    uint8_t *data;
>> -    int data_linesize, height, width;
>> -} ThreadData;
>> -
>> -static int uint8_to_float(AVFilterContext *context, void *arg, int jobnr,
>> int nb_jobs)
>> -{
>> -    SRContext *sr_context = context->priv;
>> -    const ThreadData *td = arg;
>> -    const int slice_start = (td->height *  jobnr     ) / nb_jobs;
>> -    const int slice_end   = (td->height * (jobnr + 1)) / nb_jobs;
>> -    const uint8_t *src = td->data + slice_start * td->data_linesize;
>> -    float *dst = sr_context->input.data + slice_start * td->width;
>> -    int y, x;
>> -
>> -    for (y = slice_start; y < slice_end; ++y){
>> -        for (x = 0; x < td->width; ++x){
>> -            dst[x] = (float)src[x] / 255.0f;
>> -        }
>> -        src += td->data_linesize;
>> -        dst += td->width;
>> -    }
>> -
>> -    return 0;
>> -}
>> -
>> -static int float_to_uint8(AVFilterContext *context, void *arg, int jobnr,
>> int nb_jobs)
>> -{
>> -    SRContext *sr_context = context->priv;
>> -    const ThreadData *td = arg;
>> -    const int slice_start = (td->height *  jobnr     ) / nb_jobs;
>> -    const int slice_end   = (td->height * (jobnr + 1)) / nb_jobs;
>> -    const float *src = sr_context->output.data + slice_start * td->width;
>> -    uint8_t *dst = td->data + slice_start * td->data_linesize;
>> -    int y, x;
>> -
>> -    for (y = slice_start; y < slice_end; ++y){
>> -        for (x = 0; x < td->width; ++x){
>> -            dst[x] = (uint8_t)(255.0f * FFMIN(src[x], 1.0f));
>> -        }
>> -        src += td->width;
>> -        dst += td->data_linesize;
>> -    }
>> -
>> -    return 0;
>> -}
>> -
>>  static int filter_frame(AVFilterLink *inlink, AVFrame *in)
>>  {
>>      AVFilterContext *context = inlink->dst;
>>      SRContext *sr_context = context->priv;
>>      AVFilterLink *outlink = context->outputs[0];
>>      AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
>> -    ThreadData td;
>> -    int nb_threads;
>>      DNNReturnType dnn_result;
>>
>>      if (!out){
>> @@ -268,28 +235,23 @@ static int filter_frame(AVFilterLink *inlink,
>> AVFrame *in)
>>      out->width = sr_context->output.width;
>>      switch (sr_context->model_type){
>>      case SRCNN:
>> -        sws_scale(sr_context->sws_context, (const uint8_t **)in->data,
>> in->linesize,
>> +        sws_scale(sr_context->sws_contexts[0], (const uint8_t
>> **)in->data, in->linesize,
>>                    0, sr_context->sws_slice_h, out->data, out->linesize);
>> -        td.data = out->data[0];
>> -        td.data_linesize = out->linesize[0];
>> -        td.height = out->height;
>> -        td.width = out->width;
>> +
>> +        sws_scale(sr_context->sws_contexts[1], (const uint8_t
>> **)out->data, out->linesize,
>> +                  0, out->height, (uint8_t *
>> const*)(&sr_context->input.data), &sr_context->sws_input_linesize);
>>          break;
>>      case ESPCN:
>> -        if (sr_context->sws_context){
>> -            sws_scale(sr_context->sws_context, (const uint8_t
>> **)(in->data + 1), in->linesize + 1,
>> +        if (sr_context->sws_contexts[0]){
>> +            sws_scale(sr_context->sws_contexts[0], (const uint8_t
>> **)(in->data + 1), in->linesize + 1,
>>                        0, sr_context->sws_slice_h, out->data + 1,
>> out->linesize + 1);
>> -            sws_scale(sr_context->sws_context, (const uint8_t
>> **)(in->data + 2), in->linesize + 2,
>> +            sws_scale(sr_context->sws_contexts[0], (const uint8_t
>> **)(in->data + 2), in->linesize + 2,
>>                        0, sr_context->sws_slice_h, out->data + 2,
>> out->linesize + 2);
>>          }
>> -        td.data = in->data[0];
>> -        td.data_linesize = in->linesize[0];
>> -        td.height = in->height;
>> -        td.width = in->width;
>> -    }
>>
>> -    nb_threads = ff_filter_get_nb_threads(context);
>> -    context->internal->execute(context, uint8_to_float, &td, NULL,
>> FFMIN(td.height, nb_threads));
>> +        sws_scale(sr_context->sws_contexts[1], (const uint8_t
>> **)in->data, in->linesize,
>> +                  0, in->height, (uint8_t *
>> const*)(&sr_context->input.data), &sr_context->sws_input_linesize);
>> +    }
>>      av_frame_free(&in);
>>
>>      dnn_result =
>> (sr_context->dnn_module->execute_model)(sr_context->model);
>> @@ -298,17 +260,15 @@ static int filter_frame(AVFilterLink *inlink,
>> AVFrame *in)
>>          return AVERROR(EIO);
>>      }
>>
>> -    td.data = out->data[0];
>> -    td.data_linesize = out->linesize[0];
>> -    td.height = out->height;
>> -    td.width = out->width;
>> -    context->internal->execute(context, float_to_uint8, &td, NULL,
>> FFMIN(td.height, nb_threads));
>> +    sws_scale(sr_context->sws_contexts[2], (const uint8_t
>> **)(&sr_context->output.data), &sr_context->sws_output_linesize,
>> +              0, out->height, (uint8_t * const*)out->data, out->linesize);
>>
>>      return ff_filter_frame(outlink, out);
>>  }
>>
>>  static av_cold void uninit(AVFilterContext *context)
>>  {
>> +    int i;
>>      SRContext *sr_context = context->priv;
>>
>>      if (sr_context->dnn_module){
>> @@ -316,8 +276,10 @@ static av_cold void uninit(AVFilterContext *context)
>>          av_freep(&sr_context->dnn_module);
>>      }
>>
>> -    if (sr_context->sws_context){
>> -        sws_freeContext(sr_context->sws_context);
>> +    for (i = 0; i < 3; ++i){
>> +        if (sr_context->sws_contexts[i]){
>> +            sws_freeContext(sr_context->sws_contexts[i]);
>> +        }
>>      }
>>  }
>>
>> --
>> 2.14.1
>>
>> _______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel@ffmpeg.org
>> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
>
> You are planning to remove *all* conversion still, right? Its still
> unacceptable that there *are* conversions.

They are here because it is the most efficient way to do it. The
filter works only on luminance channel therefore we only apply
conversion to Y channel, and bicubic upscale to chrominance.
I can't see how one can achieve the same result, without doing useless
computations, if not in this way.

> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Marton Balint Aug. 14, 2018, 10:49 p.m.
On Tue, 14 Aug 2018, Pedro Arthur wrote:

> 2018-08-14 15:45 GMT-03:00 Rostislav Pehlivanov <atomnuker@gmail.com>:
>> On Thu, 2 Aug 2018 at 20:00, Sergey Lavrushkin <dualfal@gmail.com> wrote:
>>
>>> This patch removes conversions, declared inside the sr filter, and uses
>>> libswscale inside
>>> the filter to perform them for only Y channel of input. The sr filter
>>> still has uint
>>> formats as input, as it does not use chroma channels in models and these
>>> channels are
>>> upscaled using libswscale, float formats for input would cause unnecessary
>>> conversions
>>> during scaling for these channels.
>>>

[...]

>> You are planning to remove *all* conversion still, right? Its still
>> unacceptable that there *are* conversions.
>
> They are here because it is the most efficient way to do it. The
> filter works only on luminance channel therefore we only apply
> conversion to Y channel, and bicubic upscale to chrominance.
> I can't see how one can achieve the same result, without doing useless
> computations, if not in this way.

Is there a reason why only the luminance channel is scaled this way? Can't 
you also train scaling chroma planes the same way? This way you could 
really eliminate the internal calls to swscale. If the user prefers to 
scale only one channel, he can always split the planes and scale them 
separately (using different filters) and then merge them.

Thanks,
Marton
Pedro Arthur Aug. 15, 2018, 1:37 p.m.
2018-08-14 19:49 GMT-03:00 Marton Balint <cus@passwd.hu>:
>
> On Tue, 14 Aug 2018, Pedro Arthur wrote:
>
>> 2018-08-14 15:45 GMT-03:00 Rostislav Pehlivanov <atomnuker@gmail.com>:
>>>
>>> On Thu, 2 Aug 2018 at 20:00, Sergey Lavrushkin <dualfal@gmail.com> wrote:
>>>
>>>> This patch removes conversions, declared inside the sr filter, and uses
>>>> libswscale inside
>>>> the filter to perform them for only Y channel of input. The sr filter
>>>> still has uint
>>>> formats as input, as it does not use chroma channels in models and these
>>>> channels are
>>>> upscaled using libswscale, float formats for input would cause
>>>> unnecessary
>>>> conversions
>>>> during scaling for these channels.
>>>>
>
> [...]
>
>>> You are planning to remove *all* conversion still, right? Its still
>>> unacceptable that there *are* conversions.
>>
>>
>> They are here because it is the most efficient way to do it. The
>> filter works only on luminance channel therefore we only apply
>> conversion to Y channel, and bicubic upscale to chrominance.
>> I can't see how one can achieve the same result, without doing useless
>> computations, if not in this way.
>
>
> Is there a reason why only the luminance channel is scaled this way? Can't
> you also train scaling chroma planes the same way? This way you could really
> eliminate the internal calls to swscale. If the user prefers to scale only
> one channel, he can always split the planes and scale them separately (using
> different filters) and then merge them.
The sr cnn tries to restore high frequency therefore it is aplied only
to luminance because applying it to chrominance does not improve much
quality and would be much slower.
The most efficient way to do it is convert only Y channel to float
apply the cnn to it and convert it back to int, and just upscale the
UV with swscale bicubic filter.
>
> Thanks,
> Marton
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Sergey Lavrushkin Aug. 15, 2018, 3:18 p.m.
2018-08-15 1:49 GMT+03:00 Marton Balint <cus@passwd.hu>:

>
> On Tue, 14 Aug 2018, Pedro Arthur wrote:
>
> 2018-08-14 15:45 GMT-03:00 Rostislav Pehlivanov <atomnuker@gmail.com>:
>>
>>> On Thu, 2 Aug 2018 at 20:00, Sergey Lavrushkin <dualfal@gmail.com>
>>> wrote:
>>>
>>> This patch removes conversions, declared inside the sr filter, and uses
>>>> libswscale inside
>>>> the filter to perform them for only Y channel of input. The sr filter
>>>> still has uint
>>>> formats as input, as it does not use chroma channels in models and these
>>>> channels are
>>>> upscaled using libswscale, float formats for input would cause
>>>> unnecessary
>>>> conversions
>>>> during scaling for these channels.
>>>>
>>>>
> [...]
>
> You are planning to remove *all* conversion still, right? Its still
>>> unacceptable that there *are* conversions.
>>>
>>
>> They are here because it is the most efficient way to do it. The
>> filter works only on luminance channel therefore we only apply
>> conversion to Y channel, and bicubic upscale to chrominance.
>> I can't see how one can achieve the same result, without doing useless
>> computations, if not in this way.
>>
>
> Is there a reason why only the luminance channel is scaled this way? Can't
> you also train scaling chroma planes the same way? This way you could
> really eliminate the internal calls to swscale. If the user prefers to
> scale only one channel, he can always split the planes and scale them
> separately (using different filters) and then merge them.
>

If it is possible, I can then change sr filter to work only for Y channel.
Can you give me some examples of how to split the planes, filter them
separately
and merge them back?
Paul B Mahol Aug. 15, 2018, 4:01 p.m.
On 8/15/18, Sergey Lavrushkin <dualfal@gmail.com> wrote:
> 2018-08-15 1:49 GMT+03:00 Marton Balint <cus@passwd.hu>:
>
>>
>> On Tue, 14 Aug 2018, Pedro Arthur wrote:
>>
>> 2018-08-14 15:45 GMT-03:00 Rostislav Pehlivanov <atomnuker@gmail.com>:
>>>
>>>> On Thu, 2 Aug 2018 at 20:00, Sergey Lavrushkin <dualfal@gmail.com>
>>>> wrote:
>>>>
>>>> This patch removes conversions, declared inside the sr filter, and uses
>>>>> libswscale inside
>>>>> the filter to perform them for only Y channel of input. The sr filter
>>>>> still has uint
>>>>> formats as input, as it does not use chroma channels in models and
>>>>> these
>>>>> channels are
>>>>> upscaled using libswscale, float formats for input would cause
>>>>> unnecessary
>>>>> conversions
>>>>> during scaling for these channels.
>>>>>
>>>>>
>> [...]
>>
>> You are planning to remove *all* conversion still, right? Its still
>>>> unacceptable that there *are* conversions.
>>>>
>>>
>>> They are here because it is the most efficient way to do it. The
>>> filter works only on luminance channel therefore we only apply
>>> conversion to Y channel, and bicubic upscale to chrominance.
>>> I can't see how one can achieve the same result, without doing useless
>>> computations, if not in this way.
>>>
>>
>> Is there a reason why only the luminance channel is scaled this way? Can't
>> you also train scaling chroma planes the same way? This way you could
>> really eliminate the internal calls to swscale. If the user prefers to
>> scale only one channel, he can always split the planes and scale them
>> separately (using different filters) and then merge them.
>>
>
> If it is possible, I can then change sr filter to work only for Y channel.
> Can you give me some examples of how to split the planes, filter them
> separately
> and merge them back?

see extractplanes and mergeplanes filters documentation.

Patch hide | download patch | download mbox

diff --git a/libavfilter/vf_sr.c b/libavfilter/vf_sr.c
index 944a0e28e7..5ad1baa4c0 100644
--- a/libavfilter/vf_sr.c
+++ b/libavfilter/vf_sr.c
@@ -45,8 +45,8 @@  typedef struct SRContext {
     DNNModel *model;
     DNNData input, output;
     int scale_factor;
-    struct SwsContext *sws_context;
-    int sws_slice_h;
+    struct SwsContext *sws_contexts[3];
+    int sws_slice_h, sws_input_linesize, sws_output_linesize;
 } SRContext;
 
 #define OFFSET(x) offsetof(SRContext, x)
@@ -95,6 +95,10 @@  static av_cold int init(AVFilterContext *context)
         return AVERROR(EIO);
     }
 
+    sr_context->sws_contexts[0] = NULL;
+    sr_context->sws_contexts[1] = NULL;
+    sr_context->sws_contexts[2] = NULL;
+
     return 0;
 }
 
@@ -110,6 +114,7 @@  static int query_formats(AVFilterContext *context)
         av_log(context, AV_LOG_ERROR, "could not create formats list\n");
         return AVERROR(ENOMEM);
     }
+
     return ff_set_common_formats(context, formats_list);
 }
 
@@ -140,21 +145,31 @@  static int config_props(AVFilterLink *inlink)
     else{
         outlink->h = sr_context->output.height;
         outlink->w = sr_context->output.width;
+        sr_context->sws_contexts[1] = sws_getContext(sr_context->input.width, sr_context->input.height, AV_PIX_FMT_GRAY8,
+                                                     sr_context->input.width, sr_context->input.height, AV_PIX_FMT_GRAYF32,
+                                                     0, NULL, NULL, NULL);
+        sr_context->sws_input_linesize = sr_context->input.width << 2;
+        sr_context->sws_contexts[2] = sws_getContext(sr_context->output.width, sr_context->output.height, AV_PIX_FMT_GRAYF32,
+                                                     sr_context->output.width, sr_context->output.height, AV_PIX_FMT_GRAY8,
+                                                     0, NULL, NULL, NULL);
+        sr_context->sws_output_linesize = sr_context->output.width << 2;
+        if (!sr_context->sws_contexts[1] || !sr_context->sws_contexts[2]){
+            av_log(context, AV_LOG_ERROR, "could not create SwsContext for conversions\n");
+            return AVERROR(ENOMEM);
+        }
         switch (sr_context->model_type){
         case SRCNN:
-            sr_context->sws_context = sws_getContext(inlink->w, inlink->h, inlink->format,
-                                                     outlink->w, outlink->h, outlink->format, SWS_BICUBIC, NULL, NULL, NULL);
-            if (!sr_context->sws_context){
-                av_log(context, AV_LOG_ERROR, "could not create SwsContext\n");
+            sr_context->sws_contexts[0] = sws_getContext(inlink->w, inlink->h, inlink->format,
+                                                         outlink->w, outlink->h, outlink->format,
+                                                         SWS_BICUBIC, NULL, NULL, NULL);
+            if (!sr_context->sws_contexts[0]){
+                av_log(context, AV_LOG_ERROR, "could not create SwsContext for scaling\n");
                 return AVERROR(ENOMEM);
             }
             sr_context->sws_slice_h = inlink->h;
             break;
         case ESPCN:
-            if (inlink->format == AV_PIX_FMT_GRAY8){
-                sr_context->sws_context = NULL;
-            }
-            else{
+            if (inlink->format != AV_PIX_FMT_GRAY8){
                 sws_src_h = sr_context->input.height;
                 sws_src_w = sr_context->input.width;
                 sws_dst_h = sr_context->output.height;
@@ -184,13 +199,14 @@  static int config_props(AVFilterLink *inlink)
                     sws_dst_w = AV_CEIL_RSHIFT(sws_dst_w, 2);
                     break;
                 default:
-                    av_log(context, AV_LOG_ERROR, "could not create SwsContext for input pixel format");
+                    av_log(context, AV_LOG_ERROR, "could not create SwsContext for scaling for given input pixel format");
                     return AVERROR(EIO);
                 }
-                sr_context->sws_context = sws_getContext(sws_src_w, sws_src_h, AV_PIX_FMT_GRAY8,
-                                                         sws_dst_w, sws_dst_h, AV_PIX_FMT_GRAY8, SWS_BICUBIC, NULL, NULL, NULL);
-                if (!sr_context->sws_context){
-                    av_log(context, AV_LOG_ERROR, "could not create SwsContext\n");
+                sr_context->sws_contexts[0] = sws_getContext(sws_src_w, sws_src_h, AV_PIX_FMT_GRAY8,
+                                                             sws_dst_w, sws_dst_h, AV_PIX_FMT_GRAY8,
+                                                             SWS_BICUBIC, NULL, NULL, NULL);
+                if (!sr_context->sws_contexts[0]){
+                    av_log(context, AV_LOG_ERROR, "could not create SwsContext for scaling\n");
                     return AVERROR(ENOMEM);
                 }
                 sr_context->sws_slice_h = sws_src_h;
@@ -201,61 +217,12 @@  static int config_props(AVFilterLink *inlink)
     }
 }
 
-typedef struct ThreadData{
-    uint8_t *data;
-    int data_linesize, height, width;
-} ThreadData;
-
-static int uint8_to_float(AVFilterContext *context, void *arg, int jobnr, int nb_jobs)
-{
-    SRContext *sr_context = context->priv;
-    const ThreadData *td = arg;
-    const int slice_start = (td->height *  jobnr     ) / nb_jobs;
-    const int slice_end   = (td->height * (jobnr + 1)) / nb_jobs;
-    const uint8_t *src = td->data + slice_start * td->data_linesize;
-    float *dst = sr_context->input.data + slice_start * td->width;
-    int y, x;
-
-    for (y = slice_start; y < slice_end; ++y){
-        for (x = 0; x < td->width; ++x){
-            dst[x] = (float)src[x] / 255.0f;
-        }
-        src += td->data_linesize;
-        dst += td->width;
-    }
-
-    return 0;
-}
-
-static int float_to_uint8(AVFilterContext *context, void *arg, int jobnr, int nb_jobs)
-{
-    SRContext *sr_context = context->priv;
-    const ThreadData *td = arg;
-    const int slice_start = (td->height *  jobnr     ) / nb_jobs;
-    const int slice_end   = (td->height * (jobnr + 1)) / nb_jobs;
-    const float *src = sr_context->output.data + slice_start * td->width;
-    uint8_t *dst = td->data + slice_start * td->data_linesize;
-    int y, x;
-
-    for (y = slice_start; y < slice_end; ++y){
-        for (x = 0; x < td->width; ++x){
-            dst[x] = (uint8_t)(255.0f * FFMIN(src[x], 1.0f));
-        }
-        src += td->width;
-        dst += td->data_linesize;
-    }
-
-    return 0;
-}
-
 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 {
     AVFilterContext *context = inlink->dst;
     SRContext *sr_context = context->priv;
     AVFilterLink *outlink = context->outputs[0];
     AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
-    ThreadData td;
-    int nb_threads;
     DNNReturnType dnn_result;
 
     if (!out){
@@ -268,28 +235,23 @@  static int filter_frame(AVFilterLink *inlink, AVFrame *in)
     out->width = sr_context->output.width;
     switch (sr_context->model_type){
     case SRCNN:
-        sws_scale(sr_context->sws_context, (const uint8_t **)in->data, in->linesize,
+        sws_scale(sr_context->sws_contexts[0], (const uint8_t **)in->data, in->linesize,
                   0, sr_context->sws_slice_h, out->data, out->linesize);
-        td.data = out->data[0];
-        td.data_linesize = out->linesize[0];
-        td.height = out->height;
-        td.width = out->width;
+
+        sws_scale(sr_context->sws_contexts[1], (const uint8_t **)out->data, out->linesize,
+                  0, out->height, (uint8_t * const*)(&sr_context->input.data), &sr_context->sws_input_linesize);
         break;
     case ESPCN:
-        if (sr_context->sws_context){
-            sws_scale(sr_context->sws_context, (const uint8_t **)(in->data + 1), in->linesize + 1,
+        if (sr_context->sws_contexts[0]){
+            sws_scale(sr_context->sws_contexts[0], (const uint8_t **)(in->data + 1), in->linesize + 1,
                       0, sr_context->sws_slice_h, out->data + 1, out->linesize + 1);
-            sws_scale(sr_context->sws_context, (const uint8_t **)(in->data + 2), in->linesize + 2,
+            sws_scale(sr_context->sws_contexts[0], (const uint8_t **)(in->data + 2), in->linesize + 2,
                       0, sr_context->sws_slice_h, out->data + 2, out->linesize + 2);
         }
-        td.data = in->data[0];
-        td.data_linesize = in->linesize[0];
-        td.height = in->height;
-        td.width = in->width;
-    }
 
-    nb_threads = ff_filter_get_nb_threads(context);
-    context->internal->execute(context, uint8_to_float, &td, NULL, FFMIN(td.height, nb_threads));
+        sws_scale(sr_context->sws_contexts[1], (const uint8_t **)in->data, in->linesize,
+                  0, in->height, (uint8_t * const*)(&sr_context->input.data), &sr_context->sws_input_linesize);
+    }
     av_frame_free(&in);
 
     dnn_result = (sr_context->dnn_module->execute_model)(sr_context->model);
@@ -298,17 +260,15 @@  static int filter_frame(AVFilterLink *inlink, AVFrame *in)
         return AVERROR(EIO);
     }
 
-    td.data = out->data[0];
-    td.data_linesize = out->linesize[0];
-    td.height = out->height;
-    td.width = out->width;
-    context->internal->execute(context, float_to_uint8, &td, NULL, FFMIN(td.height, nb_threads));
+    sws_scale(sr_context->sws_contexts[2], (const uint8_t **)(&sr_context->output.data), &sr_context->sws_output_linesize,
+              0, out->height, (uint8_t * const*)out->data, out->linesize);
 
     return ff_filter_frame(outlink, out);
 }
 
 static av_cold void uninit(AVFilterContext *context)
 {
+    int i;
     SRContext *sr_context = context->priv;
 
     if (sr_context->dnn_module){
@@ -316,8 +276,10 @@  static av_cold void uninit(AVFilterContext *context)
         av_freep(&sr_context->dnn_module);
     }
 
-    if (sr_context->sws_context){
-        sws_freeContext(sr_context->sws_context);
+    for (i = 0; i < 3; ++i){
+        if (sr_context->sws_contexts[i]){
+            sws_freeContext(sr_context->sws_contexts[i]);
+        }
     }
 }