diff mbox series

[FFmpeg-devel,v1] lavfi/qsvvpp: support async depth

Message ID 20210127014213.13461-1-fei.w.wang@intel.com
State New
Headers show
Series [FFmpeg-devel,v1] lavfi/qsvvpp: support async depth | expand

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate success Make fate finished
andriy/PPC64_make success Make finished
andriy/PPC64_make_fate success Make fate finished

Commit Message

Fei Wang Jan. 27, 2021, 1:42 a.m. UTC
Async depth will allow qsv filter cache few frames, and avoid force
switch and end filter task frame by frame. This change will improve
performance for some multi-task case, for example 1:N transcode(
decode + vpp + encode) with all QSV plugins.

Signed-off-by: Fei Wang <fei.w.wang@intel.com>
---
 libavfilter/qsvvpp.c             | 147 ++++++++++++++++++-------------
 libavfilter/qsvvpp.h             |  42 ++++++++-
 libavfilter/vf_deinterlace_qsv.c |   8 --
 libavfilter/vf_vpp_qsv.c         |  75 +++++++++++++---
 4 files changed, 187 insertions(+), 85 deletions(-)

Comments

Fei Wang Feb. 3, 2021, 1:08 a.m. UTC | #1
On Wed, 2021-01-27 at 09:42 +0800, Fei Wang wrote:
> Async depth will allow qsv filter cache few frames, and avoid force
> switch and end filter task frame by frame. This change will improve
> performance for some multi-task case, for example 1:N transcode(
> decode + vpp + encode) with all QSV plugins.
> 
> Signed-off-by: Fei Wang <fei.w.wang@intel.com>
> ---
>  libavfilter/qsvvpp.c             | 147 ++++++++++++++++++-----------
> --
>  libavfilter/qsvvpp.h             |  42 ++++++++-
>  libavfilter/vf_deinterlace_qsv.c |   8 --
>  libavfilter/vf_vpp_qsv.c         |  75 +++++++++++++---
>  4 files changed, 187 insertions(+), 85 deletions(-)
> 
> diff --git a/libavfilter/qsvvpp.c b/libavfilter/qsvvpp.c
> index f216b3f248..2e824e67e7 100644
> --- a/libavfilter/qsvvpp.c
> +++ b/libavfilter/qsvvpp.c
> @@ -27,6 +27,7 @@
>  #include "libavutil/hwcontext_qsv.h"
>  #include "libavutil/time.h"
>  #include "libavutil/pixdesc.h"
> +#include "libavutil/fifo.h"
>  
>  #include "internal.h"
>  #include "qsvvpp.h"
> @@ -37,37 +38,6 @@
>  #define IS_OPAQUE_MEMORY(mode) (mode & MFX_MEMTYPE_OPAQUE_FRAME)
>  #define IS_SYSTEM_MEMORY(mode) (mode & MFX_MEMTYPE_SYSTEM_MEMORY)
>  
> -typedef struct QSVFrame {
> -    AVFrame          *frame;
> -    mfxFrameSurface1 *surface;
> -    mfxFrameSurface1  surface_internal;  /* for system memory */
> -    struct QSVFrame  *next;
> -} QSVFrame;
> -
> -/* abstract struct for all QSV filters */
> -struct QSVVPPContext {
> -    mfxSession          session;
> -    int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame);/*
> callback */
> -    enum AVPixelFormat  out_sw_format;   /* Real output format */
> -    mfxVideoParam       vpp_param;
> -    mfxFrameInfo       *frame_infos;     /* frame info for each
> input */
> -
> -    /* members related to the input/output surface */
> -    int                 in_mem_mode;
> -    int                 out_mem_mode;
> -    QSVFrame           *in_frame_list;
> -    QSVFrame           *out_frame_list;
> -    int                 nb_surface_ptrs_in;
> -    int                 nb_surface_ptrs_out;
> -    mfxFrameSurface1  **surface_ptrs_in;
> -    mfxFrameSurface1  **surface_ptrs_out;
> -
> -    /* MFXVPP extern parameters */
> -    mfxExtOpaqueSurfaceAlloc opaque_alloc;
> -    mfxExtBuffer      **ext_buffers;
> -    int                 nb_ext_buffers;
> -};
> -
>  static const mfxHandleType handle_types[] = {
>      MFX_HANDLE_VA_DISPLAY,
>      MFX_HANDLE_D3D9_DEVICE_MANAGER,
> @@ -336,9 +306,9 @@ static int fill_frameinfo_by_link(mfxFrameInfo
> *frameinfo, AVFilterLink *link)
>  static void clear_unused_frames(QSVFrame *list)
>  {
>      while (list) {
> -        if (list->surface && !list->surface->Data.Locked) {
> -            list->surface = NULL;
> +        if (list->used && !list->queued && !list-
> >surface.Data.Locked) {
>              av_frame_free(&list->frame);
> +            list->used = 0;
>          }
>          list = list->next;
>      }
> @@ -361,8 +331,10 @@ static QSVFrame *get_free_frame(QSVFrame **list)
>      QSVFrame *out = *list;
>  
>      for (; out; out = out->next) {
> -        if (!out->surface)
> +        if (!out->used) {
> +            out->used = 1;
>              break;
> +        }
>      }
>  
>      if (!out) {
> @@ -371,6 +343,7 @@ static QSVFrame *get_free_frame(QSVFrame **list)
>              av_log(NULL, AV_LOG_ERROR, "Can't alloc new output
> frame.\n");
>              return NULL;
>          }
> +        out->used  = 1;
>          out->next  = *list;
>          *list      = out;
>      }
> @@ -402,7 +375,7 @@ static QSVFrame *submit_frame(QSVVPPContext *s,
> AVFilterLink *inlink, AVFrame *p
>              return NULL;
>          }
>          qsv_frame->frame   = av_frame_clone(picref);
> -        qsv_frame->surface = (mfxFrameSurface1 *)qsv_frame->frame-
> >data[3];
> +        qsv_frame->surface = *(mfxFrameSurface1 *)qsv_frame->frame-
> >data[3];
>      } else {
>          /* make a copy if the input is not padded as libmfx requires
> */
>          if (picref->height & 31 || picref->linesize[0] & 31) {
> @@ -425,27 +398,26 @@ static QSVFrame *submit_frame(QSVVPPContext *s,
> AVFilterLink *inlink, AVFrame *p
>              qsv_frame->frame = av_frame_clone(picref);
>  
>          if (map_frame_to_surface(qsv_frame->frame,
> -                                &qsv_frame->surface_internal) < 0) {
> +                                 &qsv_frame->surface) < 0) {
>              av_log(ctx, AV_LOG_ERROR, "Unsupported frame.\n");
>              return NULL;
>          }
> -        qsv_frame->surface = &qsv_frame->surface_internal;
>      }
>  
> -    qsv_frame->surface->Info           = s-
> >frame_infos[FF_INLINK_IDX(inlink)];
> -    qsv_frame->surface->Data.TimeStamp = av_rescale_q(qsv_frame-
> >frame->pts,
> +    qsv_frame->surface.Info           = s-
> >frame_infos[FF_INLINK_IDX(inlink)];
> +    qsv_frame->surface.Data.TimeStamp = av_rescale_q(qsv_frame-
> >frame->pts,
>                                                        inlink-
> >time_base, default_tb);
>  
> -    qsv_frame->surface->Info.PicStruct =
> +    qsv_frame->surface.Info.PicStruct =
>              !qsv_frame->frame->interlaced_frame ?
> MFX_PICSTRUCT_PROGRESSIVE :
>              (qsv_frame->frame->top_field_first ?
> MFX_PICSTRUCT_FIELD_TFF :
>                                                   MFX_PICSTRUCT_FIELD
> _BFF);
>      if (qsv_frame->frame->repeat_pict == 1)
> -        qsv_frame->surface->Info.PicStruct |=
> MFX_PICSTRUCT_FIELD_REPEATED;
> +        qsv_frame->surface.Info.PicStruct |=
> MFX_PICSTRUCT_FIELD_REPEATED;
>      else if (qsv_frame->frame->repeat_pict == 2)
> -        qsv_frame->surface->Info.PicStruct |=
> MFX_PICSTRUCT_FRAME_DOUBLING;
> +        qsv_frame->surface.Info.PicStruct |=
> MFX_PICSTRUCT_FRAME_DOUBLING;
>      else if (qsv_frame->frame->repeat_pict == 4)
> -        qsv_frame->surface->Info.PicStruct |=
> MFX_PICSTRUCT_FRAME_TRIPLING;
> +        qsv_frame->surface.Info.PicStruct |=
> MFX_PICSTRUCT_FRAME_TRIPLING;
>  
>      return qsv_frame;
>  }
> @@ -476,7 +448,7 @@ static QSVFrame *query_frame(QSVVPPContext *s,
> AVFilterLink *outlink)
>              return NULL;
>          }
>  
> -        out_frame->surface = (mfxFrameSurface1 *)out_frame->frame-
> >data[3];
> +        out_frame->surface = *(mfxFrameSurface1 *)out_frame->frame-
> >data[3];
>      } else {
>          /* Get a frame with aligned dimensions.
>           * Libmfx need system memory being 128x64 aligned */
> @@ -490,14 +462,12 @@ static QSVFrame *query_frame(QSVVPPContext *s,
> AVFilterLink *outlink)
>          out_frame->frame->height = outlink->h;
>  
>          ret = map_frame_to_surface(out_frame->frame,
> -                                  &out_frame->surface_internal);
> +                                   &out_frame->surface);
>          if (ret < 0)
>              return NULL;
> -
> -        out_frame->surface = &out_frame->surface_internal;
>      }
>  
> -    out_frame->surface->Info = s->vpp_param.vpp.Out;
> +    out_frame->surface.Info = s->vpp_param.vpp.Out;
>  
>      return out_frame;
>  }
> @@ -666,6 +636,16 @@ static int init_vpp_session(AVFilterContext
> *avctx, QSVVPPContext *s)
>      return 0;
>  }
>  
> +static unsigned int qsv_fifo_item_size(void)
> +{
> +    return sizeof(mfxSyncPoint*) + sizeof(QSVFrame*);
> +}
> +
> +static unsigned int qsv_fifo_size(const AVFifoBuffer* fifo)
> +{
> +    return  av_fifo_size(fifo)/qsv_fifo_item_size();
> +}
> +
>  int ff_qsvvpp_create(AVFilterContext *avctx, QSVVPPContext **vpp,
> QSVVPPParam *param)
>  {
>      int i;
> @@ -738,7 +718,17 @@ int ff_qsvvpp_create(AVFilterContext *avctx,
> QSVVPPContext **vpp, QSVVPPParam *p
>          s->vpp_param.ExtParam    = param->ext_buf;
>      }
>  
> -    s->vpp_param.AsyncDepth = 1;
> +    s->got_frame = 0;
> +
> +    /** keep fifo size at least 1. Even when async_depth is 0, fifo
> is used. */
> +    s->async_fifo  = av_fifo_alloc((param->async_depth + 1) *
> qsv_fifo_item_size());
> +    s->async_depth = param->async_depth;
> +    if (!s->async_fifo) {
> +        ret = AVERROR(ENOMEM);
> +        goto failed;
> +    }
> +
> +    s->vpp_param.AsyncDepth = param->async_depth;
>  
>      if (IS_SYSTEM_MEMORY(s->in_mem_mode))
>          s->vpp_param.IOPattern |= MFX_IOPATTERN_IN_SYSTEM_MEMORY;
> @@ -793,6 +783,7 @@ int ff_qsvvpp_free(QSVVPPContext **vpp)
>      av_freep(&s->surface_ptrs_out);
>      av_freep(&s->ext_buffers);
>      av_freep(&s->frame_infos);
> +    av_fifo_free(s->async_fifo);
>      av_freep(vpp);
>  
>      return 0;
> @@ -803,9 +794,29 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s,
> AVFilterLink *inlink, AVFrame *picr
>      AVFilterContext  *ctx     = inlink->dst;
>      AVFilterLink     *outlink = ctx->outputs[0];
>      mfxSyncPoint      sync;
> -    QSVFrame         *in_frame, *out_frame;
> +    QSVFrame         *in_frame, *out_frame, *tmp;
>      int               ret, filter_ret;
>  
> +    while (s->eof && qsv_fifo_size(s->async_fifo)) {
> +        av_fifo_generic_read(s->async_fifo, &tmp, sizeof(tmp),
> NULL);
> +        av_fifo_generic_read(s->async_fifo, &sync, sizeof(sync),
> NULL);
> +        if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0)
> +            av_log(ctx, AV_LOG_WARNING, "Sync failed.\n");
> +
> +        filter_ret = s->filter_frame(outlink, tmp->frame);
> +        if (filter_ret < 0) {
> +            av_frame_free(&tmp->frame);
> +            ret = filter_ret;
> +            break;
> +        }
> +        tmp->queued = 0;
> +        s->got_frame = 1;
> +        tmp->frame = NULL;
> +    };
> +
> +    if (!picref)
> +        return 0;
> +
>      in_frame = submit_frame(s, inlink, picref);
>      if (!in_frame) {
>          av_log(ctx, AV_LOG_ERROR, "Failed to submit frame on
> input[%d]\n",
> @@ -821,8 +832,8 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s,
> AVFilterLink *inlink, AVFrame *picr
>          }
>  
>          do {
> -            ret = MFXVideoVPP_RunFrameVPPAsync(s->session, in_frame-
> >surface,
> -                                               out_frame->surface,
> NULL, &sync);
> +            ret = MFXVideoVPP_RunFrameVPPAsync(s->session,
> &in_frame->surface,
> +                                               &out_frame->surface,
> NULL, &sync);
>              if (ret == MFX_WRN_DEVICE_BUSY)
>                  av_usleep(500);
>          } while (ret == MFX_WRN_DEVICE_BUSY);
> @@ -833,20 +844,32 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s,
> AVFilterLink *inlink, AVFrame *picr
>                  ret = AVERROR(EAGAIN);
>              break;
>          }
> +        out_frame->frame->pts = av_rescale_q(out_frame-
> >surface.Data.TimeStamp,
> +                                             default_tb, outlink-
> >time_base);
>  
> -        if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0)
> -            av_log(ctx, AV_LOG_WARNING, "Sync failed.\n");
> +        out_frame->queued = 1;
> +        av_fifo_generic_write(s->async_fifo, &out_frame,
> sizeof(out_frame), NULL);
> +        av_fifo_generic_write(s->async_fifo, &sync, sizeof(sync),
> NULL);
>  
> -        out_frame->frame->pts = av_rescale_q(out_frame->surface-
> >Data.TimeStamp,
> -                                             default_tb, outlink-
> >time_base);
>  
> -        filter_ret = s->filter_frame(outlink, out_frame->frame);
> -        if (filter_ret < 0) {
> -            av_frame_free(&out_frame->frame);
> -            ret = filter_ret;
> -            break;
> +        if (qsv_fifo_size(s->async_fifo) > s->async_depth) {
> +            av_fifo_generic_read(s->async_fifo, &tmp, sizeof(tmp),
> NULL);
> +            av_fifo_generic_read(s->async_fifo, &sync, sizeof(sync),
> NULL);
> +
> +            if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) <
> 0)
> +                av_log(ctx, AV_LOG_WARNING, "Sync failed.\n");
> +
> +            filter_ret = s->filter_frame(outlink, tmp->frame);
> +            if (filter_ret < 0) {
> +                av_frame_free(&tmp->frame);
> +                ret = filter_ret;
> +                break;
> +            }
> +
> +            tmp->queued = 0;
> +            s->got_frame = 1;
> +            tmp->frame = NULL;
>          }
> -        out_frame->frame = NULL;
>      } while(ret == MFX_ERR_MORE_SURFACE);
>  
>      return ret;
> diff --git a/libavfilter/qsvvpp.h b/libavfilter/qsvvpp.h
> index b4baeedf9e..48c8ffc2d2 100644
> --- a/libavfilter/qsvvpp.h
> +++ b/libavfilter/qsvvpp.h
> @@ -27,6 +27,7 @@
>  #include <mfx/mfxvideo.h>
>  
>  #include "avfilter.h"
> +#include "libavutil/fifo.h"
>  
>  #define FF_INLINK_IDX(link)  ((int)((link)->dstpad - (link)->dst-
> >input_pads))
>  #define FF_OUTLINK_IDX(link) ((int)((link)->srcpad - (link)->src-
> >output_pads))
> @@ -39,7 +40,44 @@
>      ((MFX_VERSION.Major > (MAJOR)) ||                           \
>      (MFX_VERSION.Major == (MAJOR) && MFX_VERSION.Minor >= (MINOR)))
>  
> -typedef struct QSVVPPContext QSVVPPContext;
> +#define VPP_ASYNC_DEPTH_DEFAULT 1
> +
> +typedef struct QSVFrame {
> +    AVFrame          *frame;
> +    mfxFrameSurface1 surface;
> +    struct QSVFrame  *next;
> +    int queued;
> +    int used;
> +} QSVFrame;
> +
> +typedef struct QSVVPPContext {
> +    mfxSession          session;
> +    int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame);
> /**< callback */
> +    enum AVPixelFormat  out_sw_format;   /**< Real output format */
> +    mfxVideoParam       vpp_param;
> +    mfxFrameInfo       *frame_infos;     /**< frame info for each
> input */
> +
> +    /** members related to the input/output surface */
> +    int                 in_mem_mode;
> +    int                 out_mem_mode;
> +    QSVFrame           *in_frame_list;
> +    QSVFrame           *out_frame_list;
> +    int                 nb_surface_ptrs_in;
> +    int                 nb_surface_ptrs_out;
> +    mfxFrameSurface1  **surface_ptrs_in;
> +    mfxFrameSurface1  **surface_ptrs_out;
> +
> +    /** MFXVPP extern parameters */
> +    mfxExtOpaqueSurfaceAlloc opaque_alloc;
> +    mfxExtBuffer      **ext_buffers;
> +    int                 nb_ext_buffers;
> +
> +    int got_frame;
> +    int async_depth;
> +    int eof;
> +    /** order with frame_out, sync */
> +    AVFifoBuffer *async_fifo;
> +} QSVVPPContext;
>  
>  typedef struct QSVVPPCrop {
>      int in_idx;        ///< Input index
> @@ -60,6 +98,8 @@ typedef struct QSVVPPParam {
>      /* Crop information for each input, if needed */
>      int num_crop;
>      QSVVPPCrop *crop;
> +
> +   int async_depth;
>  } QSVVPPParam;
>  
>  /* create and initialize the QSV session */
> diff --git a/libavfilter/vf_deinterlace_qsv.c
> b/libavfilter/vf_deinterlace_qsv.c
> index 89a282f99e..a620567de2 100644
> --- a/libavfilter/vf_deinterlace_qsv.c
> +++ b/libavfilter/vf_deinterlace_qsv.c
> @@ -47,14 +47,6 @@ enum {
>      QSVDEINT_MORE_INPUT,
>  };
>  
> -typedef struct QSVFrame {
> -    AVFrame *frame;
> -    mfxFrameSurface1 surface;
> -    int used;
> -
> -    struct QSVFrame *next;
> -} QSVFrame;
> -
>  typedef struct QSVDeintContext {
>      const AVClass *class;
>  
> diff --git a/libavfilter/vf_vpp_qsv.c b/libavfilter/vf_vpp_qsv.c
> index 5d57707455..83bdf1276c 100644
> --- a/libavfilter/vf_vpp_qsv.c
> +++ b/libavfilter/vf_vpp_qsv.c
> @@ -32,6 +32,7 @@
>  #include "formats.h"
>  #include "internal.h"
>  #include "avfilter.h"
> +#include "filters.h"
>  #include "libavcodec/avcodec.h"
>  #include "libavformat/avformat.h"
>  
> @@ -93,6 +94,9 @@ typedef struct VPPContext{
>      char *cx, *cy, *cw, *ch;
>      char *ow, *oh;
>      char *output_format_str;
> +
> +    int async_depth;
> +    int eof;
>  } VPPContext;
>  
>  static const AVOption options[] = {
> @@ -128,6 +132,7 @@ static const AVOption options[] = {
>      { "h",      "Output video height", OFFSET(oh),
> AV_OPT_TYPE_STRING, { .str="w*ch/cw" }, 0, 255, .flags = FLAGS },
>      { "height", "Output video height", OFFSET(oh),
> AV_OPT_TYPE_STRING, { .str="w*ch/cw" }, 0, 255, .flags = FLAGS },
>      { "format", "Output pixel format", OFFSET(output_format_str),
> AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS },
> +    { "async_depth", "Internal parallelization depth, the higher the
> value the higher the latency.", OFFSET(async_depth), AV_OPT_TYPE_INT,
> { .i64 = VPP_ASYNC_DEPTH_DEFAULT }, 0, INT_MAX, .flags = FLAGS },
>  
>      { NULL }
>  };
> @@ -303,6 +308,7 @@ static int config_output(AVFilterLink *outlink)
>      param.filter_frame  = NULL;
>      param.num_ext_buf   = 0;
>      param.ext_buf       = ext_buf;
> +    param.async_depth   = vpp->async_depth;
>  
>      if (inlink->format == AV_PIX_FMT_QSV) {
>           if (!inlink->hw_frames_ctx || !inlink->hw_frames_ctx->data)
> @@ -467,23 +473,64 @@ static int config_output(AVFilterLink *outlink)
>      return 0;
>  }
>  
> -static int filter_frame(AVFilterLink *inlink, AVFrame *picref)
> +static int activate(AVFilterContext *ctx)
>  {
> -    int              ret = 0;
> -    AVFilterContext  *ctx = inlink->dst;
> -    VPPContext       *vpp = inlink->dst->priv;
> -    AVFilterLink     *outlink = ctx->outputs[0];
> -
> -    if (vpp->qsv) {
> -        ret = ff_qsvvpp_filter_frame(vpp->qsv, inlink, picref);
> -        av_frame_free(&picref);
> +    AVFilterLink *inlink = ctx->inputs[0];
> +    AVFilterLink *outlink = ctx->outputs[0];
> +    VPPContext *s =ctx->priv;
> +    QSVVPPContext *qsv = s->qsv;
> +    AVFrame *in = NULL;
> +    int ret, status;
> +    int64_t pts;
> +
> +    FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
> +
> +    if (!s->eof) {
> +        ret = ff_inlink_consume_frame(inlink, &in);
> +        if (ret < 0)
> +            return ret;
> +
> +        if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
> +            if (status == AVERROR_EOF) {
> +                s->eof = 1;
> +            }
> +        }
> +    }
> +
> +    if (qsv) {
> +        if (in || s->eof) {
> +            qsv->eof = s->eof;
> +            ret = ff_qsvvpp_filter_frame(qsv, inlink, in);
> +            av_frame_free(&in);
> +
> +            if (s->eof) {
> +                ff_outlink_set_status(outlink, status, pts);
> +                return 0;
> +            }
> +
> +            if (qsv->got_frame) {
> +                qsv->got_frame = 0;
> +                return ret;
> +            }
> +        }
>      } else {
> -        if (picref->pts != AV_NOPTS_VALUE)
> -            picref->pts = av_rescale_q(picref->pts, inlink-
> >time_base, outlink->time_base);
> -        ret = ff_filter_frame(outlink, picref);
> +        if (in) {
> +            if (in->pts != AV_NOPTS_VALUE)
> +                in->pts = av_rescale_q(in->pts, inlink->time_base,
> outlink->time_base);
> +
> +            ret = ff_filter_frame(outlink, in);
> +            return ret;
> +        }
>      }
>  
> -    return ret;
> +    if (s->eof) {
> +        ff_outlink_set_status(outlink, status, pts);
> +        return 0;
> +    } else {
> +        FF_FILTER_FORWARD_WANTED(outlink, inlink);
> +    }
> +
> +    return FFERROR_NOT_READY;
>  }
>  
>  static int query_formats(AVFilterContext *ctx)
> @@ -531,7 +578,6 @@ static const AVFilterPad vpp_inputs[] = {
>          .name          = "default",
>          .type          = AVMEDIA_TYPE_VIDEO,
>          .config_props  = config_input,
> -        .filter_frame  = filter_frame,
>      },
>      { NULL }
>  };
> @@ -554,6 +600,7 @@ AVFilter ff_vf_vpp_qsv = {
>      .uninit        = vpp_uninit,
>      .inputs        = vpp_inputs,
>      .outputs       = vpp_outputs,
> +    .activate      = activate,
>      .priv_class    = &vpp_class,
>      .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
>  };

Ping, thanks

Fei
Fei Wang Feb. 7, 2021, 6:06 a.m. UTC | #2
> -----Original Message-----
> From: Wang, Fei W <fei.w.wang@intel.com>
> Sent: Wednesday, February 3, 2021 9:09 AM
> To: ffmpeg-devel@ffmpeg.org
> Subject: Re: [PATCH v1] lavfi/qsvvpp: support async depth
> 
> On Wed, 2021-01-27 at 09:42 +0800, Fei Wang wrote:
> > Async depth will allow qsv filter cache few frames, and avoid force
> > switch and end filter task frame by frame. This change will improve
> > performance for some multi-task case, for example 1:N transcode(
> > decode + vpp + encode) with all QSV plugins.
> >
> > Signed-off-by: Fei Wang <fei.w.wang@intel.com>
> > ---
> >  libavfilter/qsvvpp.c             | 147 ++++++++++++++++++-----------
> > --
> >  libavfilter/qsvvpp.h             |  42 ++++++++-
> >  libavfilter/vf_deinterlace_qsv.c |   8 --
> >  libavfilter/vf_vpp_qsv.c         |  75 +++++++++++++---
> >  4 files changed, 187 insertions(+), 85 deletions(-)
> >
> > diff --git a/libavfilter/qsvvpp.c b/libavfilter/qsvvpp.c index
> > f216b3f248..2e824e67e7 100644
> > --- a/libavfilter/qsvvpp.c
> > +++ b/libavfilter/qsvvpp.c
> > @@ -27,6 +27,7 @@
> >  #include "libavutil/hwcontext_qsv.h"
> >  #include "libavutil/time.h"
> >  #include "libavutil/pixdesc.h"
> > +#include "libavutil/fifo.h"
> >
> >  #include "internal.h"
> >  #include "qsvvpp.h"
> > @@ -37,37 +38,6 @@
> >  #define IS_OPAQUE_MEMORY(mode) (mode &
> MFX_MEMTYPE_OPAQUE_FRAME)
> > #define IS_SYSTEM_MEMORY(mode) (mode &
> MFX_MEMTYPE_SYSTEM_MEMORY)
> >
> > -typedef struct QSVFrame {
> > -    AVFrame          *frame;
> > -    mfxFrameSurface1 *surface;
> > -    mfxFrameSurface1  surface_internal;  /* for system memory */
> > -    struct QSVFrame  *next;
> > -} QSVFrame;
> > -
> > -/* abstract struct for all QSV filters */ -struct QSVVPPContext {
> > -    mfxSession          session;
> > -    int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame);/*
> > callback */
> > -    enum AVPixelFormat  out_sw_format;   /* Real output format */
> > -    mfxVideoParam       vpp_param;
> > -    mfxFrameInfo       *frame_infos;     /* frame info for each
> > input */
> > -
> > -    /* members related to the input/output surface */
> > -    int                 in_mem_mode;
> > -    int                 out_mem_mode;
> > -    QSVFrame           *in_frame_list;
> > -    QSVFrame           *out_frame_list;
> > -    int                 nb_surface_ptrs_in;
> > -    int                 nb_surface_ptrs_out;
> > -    mfxFrameSurface1  **surface_ptrs_in;
> > -    mfxFrameSurface1  **surface_ptrs_out;
> > -
> > -    /* MFXVPP extern parameters */
> > -    mfxExtOpaqueSurfaceAlloc opaque_alloc;
> > -    mfxExtBuffer      **ext_buffers;
> > -    int                 nb_ext_buffers;
> > -};
> > -
> >  static const mfxHandleType handle_types[] = {
> >      MFX_HANDLE_VA_DISPLAY,
> >      MFX_HANDLE_D3D9_DEVICE_MANAGER,
> > @@ -336,9 +306,9 @@ static int fill_frameinfo_by_link(mfxFrameInfo
> > *frameinfo, AVFilterLink *link)
> >  static void clear_unused_frames(QSVFrame *list)  {
> >      while (list) {
> > -        if (list->surface && !list->surface->Data.Locked) {
> > -            list->surface = NULL;
> > +        if (list->used && !list->queued && !list-
> > >surface.Data.Locked) {
> >              av_frame_free(&list->frame);
> > +            list->used = 0;
> >          }
> >          list = list->next;
> >      }
> > @@ -361,8 +331,10 @@ static QSVFrame *get_free_frame(QSVFrame **list)
> >      QSVFrame *out = *list;
> >
> >      for (; out; out = out->next) {
> > -        if (!out->surface)
> > +        if (!out->used) {
> > +            out->used = 1;
> >              break;
> > +        }
> >      }
> >
> >      if (!out) {
> > @@ -371,6 +343,7 @@ static QSVFrame *get_free_frame(QSVFrame **list)
> >              av_log(NULL, AV_LOG_ERROR, "Can't alloc new output
> > frame.\n");
> >              return NULL;
> >          }
> > +        out->used  = 1;
> >          out->next  = *list;
> >          *list      = out;
> >      }
> > @@ -402,7 +375,7 @@ static QSVFrame *submit_frame(QSVVPPContext *s,
> > AVFilterLink *inlink, AVFrame *p
> >              return NULL;
> >          }
> >          qsv_frame->frame   = av_frame_clone(picref);
> > -        qsv_frame->surface = (mfxFrameSurface1 *)qsv_frame->frame-
> > >data[3];
> > +        qsv_frame->surface = *(mfxFrameSurface1 *)qsv_frame->frame-
> > >data[3];
> >      } else {
> >          /* make a copy if the input is not padded as libmfx requires
> > */
> >          if (picref->height & 31 || picref->linesize[0] & 31) { @@
> > -425,27 +398,26 @@ static QSVFrame *submit_frame(QSVVPPContext *s,
> > AVFilterLink *inlink, AVFrame *p
> >              qsv_frame->frame = av_frame_clone(picref);
> >
> >          if (map_frame_to_surface(qsv_frame->frame,
> > -                                &qsv_frame->surface_internal) < 0) {
> > +                                 &qsv_frame->surface) < 0) {
> >              av_log(ctx, AV_LOG_ERROR, "Unsupported frame.\n");
> >              return NULL;
> >          }
> > -        qsv_frame->surface = &qsv_frame->surface_internal;
> >      }
> >
> > -    qsv_frame->surface->Info           = s-
> > >frame_infos[FF_INLINK_IDX(inlink)];
> > -    qsv_frame->surface->Data.TimeStamp = av_rescale_q(qsv_frame-
> > >frame->pts,
> > +    qsv_frame->surface.Info           = s-
> > >frame_infos[FF_INLINK_IDX(inlink)];
> > +    qsv_frame->surface.Data.TimeStamp = av_rescale_q(qsv_frame-
> > >frame->pts,
> >                                                        inlink-
> > >time_base, default_tb);
> >
> > -    qsv_frame->surface->Info.PicStruct =
> > +    qsv_frame->surface.Info.PicStruct =
> >              !qsv_frame->frame->interlaced_frame ?
> > MFX_PICSTRUCT_PROGRESSIVE :
> >              (qsv_frame->frame->top_field_first ?
> > MFX_PICSTRUCT_FIELD_TFF :
> >                                                   MFX_PICSTRUCT_FIELD
> > _BFF);
> >      if (qsv_frame->frame->repeat_pict == 1)
> > -        qsv_frame->surface->Info.PicStruct |=
> > MFX_PICSTRUCT_FIELD_REPEATED;
> > +        qsv_frame->surface.Info.PicStruct |=
> > MFX_PICSTRUCT_FIELD_REPEATED;
> >      else if (qsv_frame->frame->repeat_pict == 2)
> > -        qsv_frame->surface->Info.PicStruct |=
> > MFX_PICSTRUCT_FRAME_DOUBLING;
> > +        qsv_frame->surface.Info.PicStruct |=
> > MFX_PICSTRUCT_FRAME_DOUBLING;
> >      else if (qsv_frame->frame->repeat_pict == 4)
> > -        qsv_frame->surface->Info.PicStruct |=
> > MFX_PICSTRUCT_FRAME_TRIPLING;
> > +        qsv_frame->surface.Info.PicStruct |=
> > MFX_PICSTRUCT_FRAME_TRIPLING;
> >
> >      return qsv_frame;
> >  }
> > @@ -476,7 +448,7 @@ static QSVFrame *query_frame(QSVVPPContext *s,
> > AVFilterLink *outlink)
> >              return NULL;
> >          }
> >
> > -        out_frame->surface = (mfxFrameSurface1 *)out_frame->frame-
> > >data[3];
> > +        out_frame->surface = *(mfxFrameSurface1 *)out_frame->frame-
> > >data[3];
> >      } else {
> >          /* Get a frame with aligned dimensions.
> >           * Libmfx need system memory being 128x64 aligned */ @@
> > -490,14 +462,12 @@ static QSVFrame *query_frame(QSVVPPContext *s,
> > AVFilterLink *outlink)
> >          out_frame->frame->height = outlink->h;
> >
> >          ret = map_frame_to_surface(out_frame->frame,
> > -                                  &out_frame->surface_internal);
> > +                                   &out_frame->surface);
> >          if (ret < 0)
> >              return NULL;
> > -
> > -        out_frame->surface = &out_frame->surface_internal;
> >      }
> >
> > -    out_frame->surface->Info = s->vpp_param.vpp.Out;
> > +    out_frame->surface.Info = s->vpp_param.vpp.Out;
> >
> >      return out_frame;
> >  }
> > @@ -666,6 +636,16 @@ static int init_vpp_session(AVFilterContext
> > *avctx, QSVVPPContext *s)
> >      return 0;
> >  }
> >
> > +static unsigned int qsv_fifo_item_size(void) {
> > +    return sizeof(mfxSyncPoint*) + sizeof(QSVFrame*); }
> > +
> > +static unsigned int qsv_fifo_size(const AVFifoBuffer* fifo) {
> > +    return  av_fifo_size(fifo)/qsv_fifo_item_size();
> > +}
> > +
> >  int ff_qsvvpp_create(AVFilterContext *avctx, QSVVPPContext **vpp,
> > QSVVPPParam *param)  {
> >      int i;
> > @@ -738,7 +718,17 @@ int ff_qsvvpp_create(AVFilterContext *avctx,
> > QSVVPPContext **vpp, QSVVPPParam *p
> >          s->vpp_param.ExtParam    = param->ext_buf;
> >      }
> >
> > -    s->vpp_param.AsyncDepth = 1;
> > +    s->got_frame = 0;
> > +
> > +    /** keep fifo size at least 1. Even when async_depth is 0, fifo
> > is used. */
> > +    s->async_fifo  = av_fifo_alloc((param->async_depth + 1) *
> > qsv_fifo_item_size());
> > +    s->async_depth = param->async_depth;
> > +    if (!s->async_fifo) {
> > +        ret = AVERROR(ENOMEM);
> > +        goto failed;
> > +    }
> > +
> > +    s->vpp_param.AsyncDepth = param->async_depth;
> >
> >      if (IS_SYSTEM_MEMORY(s->in_mem_mode))
> >          s->vpp_param.IOPattern |= MFX_IOPATTERN_IN_SYSTEM_MEMORY;
> @@
> > -793,6 +783,7 @@ int ff_qsvvpp_free(QSVVPPContext **vpp)
> >      av_freep(&s->surface_ptrs_out);
> >      av_freep(&s->ext_buffers);
> >      av_freep(&s->frame_infos);
> > +    av_fifo_free(s->async_fifo);
> >      av_freep(vpp);
> >
> >      return 0;
> > @@ -803,9 +794,29 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s,
> > AVFilterLink *inlink, AVFrame *picr
> >      AVFilterContext  *ctx     = inlink->dst;
> >      AVFilterLink     *outlink = ctx->outputs[0];
> >      mfxSyncPoint      sync;
> > -    QSVFrame         *in_frame, *out_frame;
> > +    QSVFrame         *in_frame, *out_frame, *tmp;
> >      int               ret, filter_ret;
> >
> > +    while (s->eof && qsv_fifo_size(s->async_fifo)) {
> > +        av_fifo_generic_read(s->async_fifo, &tmp, sizeof(tmp),
> > NULL);
> > +        av_fifo_generic_read(s->async_fifo, &sync, sizeof(sync),
> > NULL);
> > +        if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0)
> > +            av_log(ctx, AV_LOG_WARNING, "Sync failed.\n");
> > +
> > +        filter_ret = s->filter_frame(outlink, tmp->frame);
> > +        if (filter_ret < 0) {
> > +            av_frame_free(&tmp->frame);
> > +            ret = filter_ret;
> > +            break;
> > +        }
> > +        tmp->queued = 0;
> > +        s->got_frame = 1;
> > +        tmp->frame = NULL;
> > +    };
> > +
> > +    if (!picref)
> > +        return 0;
> > +
> >      in_frame = submit_frame(s, inlink, picref);
> >      if (!in_frame) {
> >          av_log(ctx, AV_LOG_ERROR, "Failed to submit frame on
> > input[%d]\n", @@ -821,8 +832,8 @@ int
> > ff_qsvvpp_filter_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame
> > *picr
> >          }
> >
> >          do {
> > -            ret = MFXVideoVPP_RunFrameVPPAsync(s->session, in_frame-
> > >surface,
> > -                                               out_frame->surface,
> > NULL, &sync);
> > +            ret = MFXVideoVPP_RunFrameVPPAsync(s->session,
> > &in_frame->surface,
> > +                                               &out_frame->surface,
> > NULL, &sync);
> >              if (ret == MFX_WRN_DEVICE_BUSY)
> >                  av_usleep(500);
> >          } while (ret == MFX_WRN_DEVICE_BUSY); @@ -833,20 +844,32 @@
> > int ff_qsvvpp_filter_frame(QSVVPPContext *s, AVFilterLink *inlink,
> > AVFrame *picr
> >                  ret = AVERROR(EAGAIN);
> >              break;
> >          }
> > +        out_frame->frame->pts = av_rescale_q(out_frame-
> > >surface.Data.TimeStamp,
> > +                                             default_tb, outlink-
> > >time_base);
> >
> > -        if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0)
> > -            av_log(ctx, AV_LOG_WARNING, "Sync failed.\n");
> > +        out_frame->queued = 1;
> > +        av_fifo_generic_write(s->async_fifo, &out_frame,
> > sizeof(out_frame), NULL);
> > +        av_fifo_generic_write(s->async_fifo, &sync, sizeof(sync),
> > NULL);
> >
> > -        out_frame->frame->pts = av_rescale_q(out_frame->surface-
> > >Data.TimeStamp,
> > -                                             default_tb, outlink-
> > >time_base);
> >
> > -        filter_ret = s->filter_frame(outlink, out_frame->frame);
> > -        if (filter_ret < 0) {
> > -            av_frame_free(&out_frame->frame);
> > -            ret = filter_ret;
> > -            break;
> > +        if (qsv_fifo_size(s->async_fifo) > s->async_depth) {
> > +            av_fifo_generic_read(s->async_fifo, &tmp, sizeof(tmp),
> > NULL);
> > +            av_fifo_generic_read(s->async_fifo, &sync, sizeof(sync),
> > NULL);
> > +
> > +            if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) <
> > 0)
> > +                av_log(ctx, AV_LOG_WARNING, "Sync failed.\n");
> > +
> > +            filter_ret = s->filter_frame(outlink, tmp->frame);
> > +            if (filter_ret < 0) {
> > +                av_frame_free(&tmp->frame);
> > +                ret = filter_ret;
> > +                break;
> > +            }
> > +
> > +            tmp->queued = 0;
> > +            s->got_frame = 1;
> > +            tmp->frame = NULL;
> >          }
> > -        out_frame->frame = NULL;
> >      } while(ret == MFX_ERR_MORE_SURFACE);
> >
> >      return ret;
> > diff --git a/libavfilter/qsvvpp.h b/libavfilter/qsvvpp.h index
> > b4baeedf9e..48c8ffc2d2 100644
> > --- a/libavfilter/qsvvpp.h
> > +++ b/libavfilter/qsvvpp.h
> > @@ -27,6 +27,7 @@
> >  #include <mfx/mfxvideo.h>
> >
> >  #include "avfilter.h"
> > +#include "libavutil/fifo.h"
> >
> >  #define FF_INLINK_IDX(link)  ((int)((link)->dstpad - (link)->dst-
> > >input_pads))
> >  #define FF_OUTLINK_IDX(link) ((int)((link)->srcpad - (link)->src-
> > >output_pads))
> > @@ -39,7 +40,44 @@
> >      ((MFX_VERSION.Major > (MAJOR)) ||                           \
> >      (MFX_VERSION.Major == (MAJOR) && MFX_VERSION.Minor >= (MINOR)))
> >
> > -typedef struct QSVVPPContext QSVVPPContext;
> > +#define VPP_ASYNC_DEPTH_DEFAULT 1
> > +
> > +typedef struct QSVFrame {
> > +    AVFrame          *frame;
> > +    mfxFrameSurface1 surface;
> > +    struct QSVFrame  *next;
> > +    int queued;
> > +    int used;
> > +} QSVFrame;
> > +
> > +typedef struct QSVVPPContext {
> > +    mfxSession          session;
> > +    int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame);
> > /**< callback */
> > +    enum AVPixelFormat  out_sw_format;   /**< Real output format */
> > +    mfxVideoParam       vpp_param;
> > +    mfxFrameInfo       *frame_infos;     /**< frame info for each
> > input */
> > +
> > +    /** members related to the input/output surface */
> > +    int                 in_mem_mode;
> > +    int                 out_mem_mode;
> > +    QSVFrame           *in_frame_list;
> > +    QSVFrame           *out_frame_list;
> > +    int                 nb_surface_ptrs_in;
> > +    int                 nb_surface_ptrs_out;
> > +    mfxFrameSurface1  **surface_ptrs_in;
> > +    mfxFrameSurface1  **surface_ptrs_out;
> > +
> > +    /** MFXVPP extern parameters */
> > +    mfxExtOpaqueSurfaceAlloc opaque_alloc;
> > +    mfxExtBuffer      **ext_buffers;
> > +    int                 nb_ext_buffers;
> > +
> > +    int got_frame;
> > +    int async_depth;
> > +    int eof;
> > +    /** order with frame_out, sync */
> > +    AVFifoBuffer *async_fifo;
> > +} QSVVPPContext;
> >
> >  typedef struct QSVVPPCrop {
> >      int in_idx;        ///< Input index
> > @@ -60,6 +98,8 @@ typedef struct QSVVPPParam {
> >      /* Crop information for each input, if needed */
> >      int num_crop;
> >      QSVVPPCrop *crop;
> > +
> > +   int async_depth;
> >  } QSVVPPParam;
> >
> >  /* create and initialize the QSV session */ diff --git
> > a/libavfilter/vf_deinterlace_qsv.c
> > b/libavfilter/vf_deinterlace_qsv.c
> > index 89a282f99e..a620567de2 100644
> > --- a/libavfilter/vf_deinterlace_qsv.c
> > +++ b/libavfilter/vf_deinterlace_qsv.c
> > @@ -47,14 +47,6 @@ enum {
> >      QSVDEINT_MORE_INPUT,
> >  };
> >
> > -typedef struct QSVFrame {
> > -    AVFrame *frame;
> > -    mfxFrameSurface1 surface;
> > -    int used;
> > -
> > -    struct QSVFrame *next;
> > -} QSVFrame;
> > -
> >  typedef struct QSVDeintContext {
> >      const AVClass *class;
> >
> > diff --git a/libavfilter/vf_vpp_qsv.c b/libavfilter/vf_vpp_qsv.c index
> > 5d57707455..83bdf1276c 100644
> > --- a/libavfilter/vf_vpp_qsv.c
> > +++ b/libavfilter/vf_vpp_qsv.c
> > @@ -32,6 +32,7 @@
> >  #include "formats.h"
> >  #include "internal.h"
> >  #include "avfilter.h"
> > +#include "filters.h"
> >  #include "libavcodec/avcodec.h"
> >  #include "libavformat/avformat.h"
> >
> > @@ -93,6 +94,9 @@ typedef struct VPPContext{
> >      char *cx, *cy, *cw, *ch;
> >      char *ow, *oh;
> >      char *output_format_str;
> > +
> > +    int async_depth;
> > +    int eof;
> >  } VPPContext;
> >
> >  static const AVOption options[] = {
> > @@ -128,6 +132,7 @@ static const AVOption options[] = {
> >      { "h",      "Output video height", OFFSET(oh),
> > AV_OPT_TYPE_STRING, { .str="w*ch/cw" }, 0, 255, .flags = FLAGS },
> >      { "height", "Output video height", OFFSET(oh),
> > AV_OPT_TYPE_STRING, { .str="w*ch/cw" }, 0, 255, .flags = FLAGS },
> >      { "format", "Output pixel format", OFFSET(output_format_str),
> > AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS },
> > +    { "async_depth", "Internal parallelization depth, the higher the
> > value the higher the latency.", OFFSET(async_depth), AV_OPT_TYPE_INT,
> > { .i64 = VPP_ASYNC_DEPTH_DEFAULT }, 0, INT_MAX, .flags = FLAGS },
> >
> >      { NULL }
> >  };
> > @@ -303,6 +308,7 @@ static int config_output(AVFilterLink *outlink)
> >      param.filter_frame  = NULL;
> >      param.num_ext_buf   = 0;
> >      param.ext_buf       = ext_buf;
> > +    param.async_depth   = vpp->async_depth;
> >
> >      if (inlink->format == AV_PIX_FMT_QSV) {
> >           if (!inlink->hw_frames_ctx || !inlink->hw_frames_ctx->data)
> > @@ -467,23 +473,64 @@ static int config_output(AVFilterLink *outlink)
> >      return 0;
> >  }
> >
> > -static int filter_frame(AVFilterLink *inlink, AVFrame *picref)
> > +static int activate(AVFilterContext *ctx)
> >  {
> > -    int              ret = 0;
> > -    AVFilterContext  *ctx = inlink->dst;
> > -    VPPContext       *vpp = inlink->dst->priv;
> > -    AVFilterLink     *outlink = ctx->outputs[0];
> > -
> > -    if (vpp->qsv) {
> > -        ret = ff_qsvvpp_filter_frame(vpp->qsv, inlink, picref);
> > -        av_frame_free(&picref);
> > +    AVFilterLink *inlink = ctx->inputs[0];
> > +    AVFilterLink *outlink = ctx->outputs[0];
> > +    VPPContext *s =ctx->priv;
> > +    QSVVPPContext *qsv = s->qsv;
> > +    AVFrame *in = NULL;
> > +    int ret, status;
> > +    int64_t pts;
> > +
> > +    FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
> > +
> > +    if (!s->eof) {
> > +        ret = ff_inlink_consume_frame(inlink, &in);
> > +        if (ret < 0)
> > +            return ret;
> > +
> > +        if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
> > +            if (status == AVERROR_EOF) {
> > +                s->eof = 1;
> > +            }
> > +        }
> > +    }
> > +
> > +    if (qsv) {
> > +        if (in || s->eof) {
> > +            qsv->eof = s->eof;
> > +            ret = ff_qsvvpp_filter_frame(qsv, inlink, in);
> > +            av_frame_free(&in);
> > +
> > +            if (s->eof) {
> > +                ff_outlink_set_status(outlink, status, pts);
> > +                return 0;
> > +            }
> > +
> > +            if (qsv->got_frame) {
> > +                qsv->got_frame = 0;
> > +                return ret;
> > +            }
> > +        }
> >      } else {
> > -        if (picref->pts != AV_NOPTS_VALUE)
> > -            picref->pts = av_rescale_q(picref->pts, inlink-
> > >time_base, outlink->time_base);
> > -        ret = ff_filter_frame(outlink, picref);
> > +        if (in) {
> > +            if (in->pts != AV_NOPTS_VALUE)
> > +                in->pts = av_rescale_q(in->pts, inlink->time_base,
> > outlink->time_base);
> > +
> > +            ret = ff_filter_frame(outlink, in);
> > +            return ret;
> > +        }
> >      }
> >
> > -    return ret;
> > +    if (s->eof) {
> > +        ff_outlink_set_status(outlink, status, pts);
> > +        return 0;
> > +    } else {
> > +        FF_FILTER_FORWARD_WANTED(outlink, inlink);
> > +    }
> > +
> > +    return FFERROR_NOT_READY;
> >  }
> >
> >  static int query_formats(AVFilterContext *ctx) @@ -531,7 +578,6 @@
> > static const AVFilterPad vpp_inputs[] = {
> >          .name          = "default",
> >          .type          = AVMEDIA_TYPE_VIDEO,
> >          .config_props  = config_input,
> > -        .filter_frame  = filter_frame,
> >      },
> >      { NULL }
> >  };
> > @@ -554,6 +600,7 @@ AVFilter ff_vf_vpp_qsv = {
> >      .uninit        = vpp_uninit,
> >      .inputs        = vpp_inputs,
> >      .outputs       = vpp_outputs,
> > +    .activate      = activate,
> >      .priv_class    = &vpp_class,
> >      .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,  };
> 
> Ping, thanks

Ping for review. @Mark Thompson @lizhong1008@gmail.com free to review this patch?

> 
> Fei
Xiang, Haihao March 12, 2021, 6:20 a.m. UTC | #3
On Wed, 2021-01-27 at 09:42 +0800, Fei Wang wrote:
> Async depth will allow qsv filter cache few frames, and avoid force
> switch and end filter task frame by frame. This change will improve
> performance for some multi-task case, for example 1:N transcode(
> decode + vpp + encode) with all QSV plugins.
> 
> Signed-off-by: Fei Wang <fei.w.wang@intel.com>
> ---
>  libavfilter/qsvvpp.c             | 147 ++++++++++++++++++-------------
>  libavfilter/qsvvpp.h             |  42 ++++++++-
>  libavfilter/vf_deinterlace_qsv.c |   8 --
>  libavfilter/vf_vpp_qsv.c         |  75 +++++++++++++---
>  4 files changed, 187 insertions(+), 85 deletions(-)
> 
> diff --git a/libavfilter/qsvvpp.c b/libavfilter/qsvvpp.c
> index f216b3f248..2e824e67e7 100644
> --- a/libavfilter/qsvvpp.c
> +++ b/libavfilter/qsvvpp.c
> @@ -27,6 +27,7 @@
>  #include "libavutil/hwcontext_qsv.h"
>  #include "libavutil/time.h"
>  #include "libavutil/pixdesc.h"
> +#include "libavutil/fifo.h"
>  
>  #include "internal.h"
>  #include "qsvvpp.h"
> @@ -37,37 +38,6 @@
>  #define IS_OPAQUE_MEMORY(mode) (mode & MFX_MEMTYPE_OPAQUE_FRAME)
>  #define IS_SYSTEM_MEMORY(mode) (mode & MFX_MEMTYPE_SYSTEM_MEMORY)
>  
> -typedef struct QSVFrame {
> -    AVFrame          *frame;
> -    mfxFrameSurface1 *surface;
> -    mfxFrameSurface1  surface_internal;  /* for system memory */
> -    struct QSVFrame  *next;
> -} QSVFrame;
> -
> -/* abstract struct for all QSV filters */
> -struct QSVVPPContext {
> -    mfxSession          session;
> -    int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame);/* callback
> */
> -    enum AVPixelFormat  out_sw_format;   /* Real output format */
> -    mfxVideoParam       vpp_param;
> -    mfxFrameInfo       *frame_infos;     /* frame info for each input */
> -
> -    /* members related to the input/output surface */
> -    int                 in_mem_mode;
> -    int                 out_mem_mode;
> -    QSVFrame           *in_frame_list;
> -    QSVFrame           *out_frame_list;
> -    int                 nb_surface_ptrs_in;
> -    int                 nb_surface_ptrs_out;
> -    mfxFrameSurface1  **surface_ptrs_in;
> -    mfxFrameSurface1  **surface_ptrs_out;
> -
> -    /* MFXVPP extern parameters */
> -    mfxExtOpaqueSurfaceAlloc opaque_alloc;
> -    mfxExtBuffer      **ext_buffers;
> -    int                 nb_ext_buffers;
> -};
> -
>  static const mfxHandleType handle_types[] = {
>      MFX_HANDLE_VA_DISPLAY,
>      MFX_HANDLE_D3D9_DEVICE_MANAGER,
> @@ -336,9 +306,9 @@ static int fill_frameinfo_by_link(mfxFrameInfo *frameinfo,
> AVFilterLink *link)
>  static void clear_unused_frames(QSVFrame *list)
>  {
>      while (list) {
> -        if (list->surface && !list->surface->Data.Locked) {
> -            list->surface = NULL;
> +        if (list->used && !list->queued && !list->surface.Data.Locked) {
>              av_frame_free(&list->frame);
> +            list->used = 0;
>          }
>          list = list->next;
>      }
> @@ -361,8 +331,10 @@ static QSVFrame *get_free_frame(QSVFrame **list)
>      QSVFrame *out = *list;
>  
>      for (; out; out = out->next) {
> -        if (!out->surface)
> +        if (!out->used) {
> +            out->used = 1;
>              break;
> +        }
>      }
>  
>      if (!out) {
> @@ -371,6 +343,7 @@ static QSVFrame *get_free_frame(QSVFrame **list)
>              av_log(NULL, AV_LOG_ERROR, "Can't alloc new output frame.\n");
>              return NULL;
>          }
> +        out->used  = 1;
>          out->next  = *list;
>          *list      = out;
>      }
> @@ -402,7 +375,7 @@ static QSVFrame *submit_frame(QSVVPPContext *s,
> AVFilterLink *inlink, AVFrame *p
>              return NULL;
>          }
>          qsv_frame->frame   = av_frame_clone(picref);
> -        qsv_frame->surface = (mfxFrameSurface1 *)qsv_frame->frame->data[3];
> +        qsv_frame->surface = *(mfxFrameSurface1 *)qsv_frame->frame->data[3];
>      } else {
>          /* make a copy if the input is not padded as libmfx requires */
>          if (picref->height & 31 || picref->linesize[0] & 31) {
> @@ -425,27 +398,26 @@ static QSVFrame *submit_frame(QSVVPPContext *s,
> AVFilterLink *inlink, AVFrame *p
>              qsv_frame->frame = av_frame_clone(picref);
>  
>          if (map_frame_to_surface(qsv_frame->frame,
> -                                &qsv_frame->surface_internal) < 0) {
> +                                 &qsv_frame->surface) < 0) {
>              av_log(ctx, AV_LOG_ERROR, "Unsupported frame.\n");
>              return NULL;
>          }
> -        qsv_frame->surface = &qsv_frame->surface_internal;
>      }
>  
> -    qsv_frame->surface->Info           = s-
> >frame_infos[FF_INLINK_IDX(inlink)];
> -    qsv_frame->surface->Data.TimeStamp = av_rescale_q(qsv_frame->frame->pts,
> +    qsv_frame->surface.Info           = s-
> >frame_infos[FF_INLINK_IDX(inlink)];
> +    qsv_frame->surface.Data.TimeStamp = av_rescale_q(qsv_frame->frame->pts,
>                                                        inlink->time_base,
> default_tb);
>  
> -    qsv_frame->surface->Info.PicStruct =
> +    qsv_frame->surface.Info.PicStruct =
>              !qsv_frame->frame->interlaced_frame ? MFX_PICSTRUCT_PROGRESSIVE :
>              (qsv_frame->frame->top_field_first ? MFX_PICSTRUCT_FIELD_TFF :
>                                                   MFX_PICSTRUCT_FIELD_BFF);
>      if (qsv_frame->frame->repeat_pict == 1)
> -        qsv_frame->surface->Info.PicStruct |= MFX_PICSTRUCT_FIELD_REPEATED;
> +        qsv_frame->surface.Info.PicStruct |= MFX_PICSTRUCT_FIELD_REPEATED;
>      else if (qsv_frame->frame->repeat_pict == 2)
> -        qsv_frame->surface->Info.PicStruct |= MFX_PICSTRUCT_FRAME_DOUBLING;
> +        qsv_frame->surface.Info.PicStruct |= MFX_PICSTRUCT_FRAME_DOUBLING;
>      else if (qsv_frame->frame->repeat_pict == 4)
> -        qsv_frame->surface->Info.PicStruct |= MFX_PICSTRUCT_FRAME_TRIPLING;
> +        qsv_frame->surface.Info.PicStruct |= MFX_PICSTRUCT_FRAME_TRIPLING;
>  
>      return qsv_frame;
>  }
> @@ -476,7 +448,7 @@ static QSVFrame *query_frame(QSVVPPContext *s,
> AVFilterLink *outlink)
>              return NULL;
>          }
>  
> -        out_frame->surface = (mfxFrameSurface1 *)out_frame->frame->data[3];
> +        out_frame->surface = *(mfxFrameSurface1 *)out_frame->frame->data[3];
>      } else {
>          /* Get a frame with aligned dimensions.
>           * Libmfx need system memory being 128x64 aligned */
> @@ -490,14 +462,12 @@ static QSVFrame *query_frame(QSVVPPContext *s,
> AVFilterLink *outlink)
>          out_frame->frame->height = outlink->h;
>  
>          ret = map_frame_to_surface(out_frame->frame,
> -                                  &out_frame->surface_internal);
> +                                   &out_frame->surface);
>          if (ret < 0)
>              return NULL;
> -
> -        out_frame->surface = &out_frame->surface_internal;
>      }
>  
> -    out_frame->surface->Info = s->vpp_param.vpp.Out;
> +    out_frame->surface.Info = s->vpp_param.vpp.Out;
>  
>      return out_frame;
>  }
> @@ -666,6 +636,16 @@ static int init_vpp_session(AVFilterContext *avctx,
> QSVVPPContext *s)
>      return 0;
>  }
>  
> +static unsigned int qsv_fifo_item_size(void)
> +{
> +    return sizeof(mfxSyncPoint*) + sizeof(QSVFrame*);
> +}
> +
> +static unsigned int qsv_fifo_size(const AVFifoBuffer* fifo)
> +{
> +    return  av_fifo_size(fifo)/qsv_fifo_item_size();
> +}
> +
>  int ff_qsvvpp_create(AVFilterContext *avctx, QSVVPPContext **vpp, QSVVPPParam
> *param)
>  {
>      int i;
> @@ -738,7 +718,17 @@ int ff_qsvvpp_create(AVFilterContext *avctx,
> QSVVPPContext **vpp, QSVVPPParam *p
>          s->vpp_param.ExtParam    = param->ext_buf;
>      }
>  
> -    s->vpp_param.AsyncDepth = 1;
> +    s->got_frame = 0;
> +
> +    /** keep fifo size at least 1. Even when async_depth is 0, fifo is used.
> */
> +    s->async_fifo  = av_fifo_alloc((param->async_depth + 1) *
> qsv_fifo_item_size());
> +    s->async_depth = param->async_depth;
> +    if (!s->async_fifo) {
> +        ret = AVERROR(ENOMEM);
> +        goto failed;
> +    }
> +
> +    s->vpp_param.AsyncDepth = param->async_depth;
>  
>      if (IS_SYSTEM_MEMORY(s->in_mem_mode))
>          s->vpp_param.IOPattern |= MFX_IOPATTERN_IN_SYSTEM_MEMORY;
> @@ -793,6 +783,7 @@ int ff_qsvvpp_free(QSVVPPContext **vpp)
>      av_freep(&s->surface_ptrs_out);
>      av_freep(&s->ext_buffers);
>      av_freep(&s->frame_infos);
> +    av_fifo_free(s->async_fifo);
>      av_freep(vpp);
>  
>      return 0;
> @@ -803,9 +794,29 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s, AVFilterLink
> *inlink, AVFrame *picr
>      AVFilterContext  *ctx     = inlink->dst;
>      AVFilterLink     *outlink = ctx->outputs[0];
>      mfxSyncPoint      sync;
> -    QSVFrame         *in_frame, *out_frame;
> +    QSVFrame         *in_frame, *out_frame, *tmp;
>      int               ret, filter_ret;
>  
> +    while (s->eof && qsv_fifo_size(s->async_fifo)) {
> +        av_fifo_generic_read(s->async_fifo, &tmp, sizeof(tmp), NULL);
> +        av_fifo_generic_read(s->async_fifo, &sync, sizeof(sync), NULL);
> +        if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0)
> +            av_log(ctx, AV_LOG_WARNING, "Sync failed.\n");
> +
> +        filter_ret = s->filter_frame(outlink, tmp->frame);
> +        if (filter_ret < 0) {
> +            av_frame_free(&tmp->frame);
> +            ret = filter_ret;
> +            break;
> +        }
> +        tmp->queued = 0;
> +        s->got_frame = 1;
> +        tmp->frame = NULL;
> +    };
> +
> +    if (!picref)
> +        return 0;
> +
>      in_frame = submit_frame(s, inlink, picref);
>      if (!in_frame) {
>          av_log(ctx, AV_LOG_ERROR, "Failed to submit frame on input[%d]\n",
> @@ -821,8 +832,8 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s, AVFilterLink
> *inlink, AVFrame *picr
>          }
>  
>          do {
> -            ret = MFXVideoVPP_RunFrameVPPAsync(s->session, in_frame->surface,
> -                                               out_frame->surface, NULL,
> &sync);
> +            ret = MFXVideoVPP_RunFrameVPPAsync(s->session, &in_frame-
> >surface,
> +                                               &out_frame->surface, NULL,
> &sync);
>              if (ret == MFX_WRN_DEVICE_BUSY)
>                  av_usleep(500);
>          } while (ret == MFX_WRN_DEVICE_BUSY);
> @@ -833,20 +844,32 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s,
> AVFilterLink *inlink, AVFrame *picr
>                  ret = AVERROR(EAGAIN);
>              break;
>          }
> +        out_frame->frame->pts = av_rescale_q(out_frame-
> >surface.Data.TimeStamp,
> +                                             default_tb, outlink->time_base);
>  
> -        if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0)
> -            av_log(ctx, AV_LOG_WARNING, "Sync failed.\n");
> +        out_frame->queued = 1;
> +        av_fifo_generic_write(s->async_fifo, &out_frame, sizeof(out_frame),
> NULL);
> +        av_fifo_generic_write(s->async_fifo, &sync, sizeof(sync), NULL);
>  
> -        out_frame->frame->pts = av_rescale_q(out_frame->surface-
> >Data.TimeStamp,
> -                                             default_tb, outlink->time_base);
>  
> -        filter_ret = s->filter_frame(outlink, out_frame->frame);
> -        if (filter_ret < 0) {
> -            av_frame_free(&out_frame->frame);
> -            ret = filter_ret;
> -            break;
> +        if (qsv_fifo_size(s->async_fifo) > s->async_depth) {
> +            av_fifo_generic_read(s->async_fifo, &tmp, sizeof(tmp), NULL);
> +            av_fifo_generic_read(s->async_fifo, &sync, sizeof(sync), NULL);
> +
> +            if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0)
> +                av_log(ctx, AV_LOG_WARNING, "Sync failed.\n");
> +
> +            filter_ret = s->filter_frame(outlink, tmp->frame);
> +            if (filter_ret < 0) {
> +                av_frame_free(&tmp->frame);
> +                ret = filter_ret;
> +                break;
> +            }
> +
> +            tmp->queued = 0;
> +            s->got_frame = 1;
> +            tmp->frame = NULL;
>          }
> -        out_frame->frame = NULL;
>      } while(ret == MFX_ERR_MORE_SURFACE);
>  
>      return ret;
> diff --git a/libavfilter/qsvvpp.h b/libavfilter/qsvvpp.h
> index b4baeedf9e..48c8ffc2d2 100644
> --- a/libavfilter/qsvvpp.h
> +++ b/libavfilter/qsvvpp.h
> @@ -27,6 +27,7 @@
>  #include <mfx/mfxvideo.h>
>  
>  #include "avfilter.h"
> +#include "libavutil/fifo.h"
>  
>  #define FF_INLINK_IDX(link)  ((int)((link)->dstpad - (link)->dst-
> >input_pads))
>  #define FF_OUTLINK_IDX(link) ((int)((link)->srcpad - (link)->src-
> >output_pads))
> @@ -39,7 +40,44 @@
>      ((MFX_VERSION.Major > (MAJOR)) ||                           \
>      (MFX_VERSION.Major == (MAJOR) && MFX_VERSION.Minor >= (MINOR)))
>  
> -typedef struct QSVVPPContext QSVVPPContext;
> +#define VPP_ASYNC_DEPTH_DEFAULT 1
> +
> +typedef struct QSVFrame {
> +    AVFrame          *frame;
> +    mfxFrameSurface1 surface;
> +    struct QSVFrame  *next;
> +    int queued;
> +    int used;

May we use queue count as what wenbin did for qsv decoder in 
http://ffmpeg.org/pipermail/ffmpeg-devel/2021-March/277633.html ? If so, I think
we may not use the member of 'used' and the logic will besimple. 

Thanks
Haihao 


> +} QSVFrame;
> +
> +typedef struct QSVVPPContext {
> +    mfxSession          session;
> +    int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame); /**<
> callback */
> +    enum AVPixelFormat  out_sw_format;   /**< Real output format */
> +    mfxVideoParam       vpp_param;
> +    mfxFrameInfo       *frame_infos;     /**< frame info for each input */
> +
> +    /** members related to the input/output surface */
> +    int                 in_mem_mode;
> +    int                 out_mem_mode;
> +    QSVFrame           *in_frame_list;
> +    QSVFrame           *out_frame_list;
> +    int                 nb_surface_ptrs_in;
> +    int                 nb_surface_ptrs_out;
> +    mfxFrameSurface1  **surface_ptrs_in;
> +    mfxFrameSurface1  **surface_ptrs_out;
> +
> +    /** MFXVPP extern parameters */
> +    mfxExtOpaqueSurfaceAlloc opaque_alloc;
> +    mfxExtBuffer      **ext_buffers;
> +    int                 nb_ext_buffers;
> +
> +    int got_frame;
> +    int async_depth;
> +    int eof;
> +    /** order with frame_out, sync */
> +    AVFifoBuffer *async_fifo;
> +} QSVVPPContext;
>  
>  typedef struct QSVVPPCrop {
>      int in_idx;        ///< Input index
> @@ -60,6 +98,8 @@ typedef struct QSVVPPParam {
>      /* Crop information for each input, if needed */
>      int num_crop;
>      QSVVPPCrop *crop;
> +
> +   int async_depth;
>  } QSVVPPParam;
>  
>  /* create and initialize the QSV session */
> diff --git a/libavfilter/vf_deinterlace_qsv.c
> b/libavfilter/vf_deinterlace_qsv.c
> index 89a282f99e..a620567de2 100644
> --- a/libavfilter/vf_deinterlace_qsv.c
> +++ b/libavfilter/vf_deinterlace_qsv.c
> @@ -47,14 +47,6 @@ enum {
>      QSVDEINT_MORE_INPUT,
>  };
>  
> -typedef struct QSVFrame {
> -    AVFrame *frame;
> -    mfxFrameSurface1 surface;
> -    int used;
> -
> -    struct QSVFrame *next;
> -} QSVFrame;
> -
>  typedef struct QSVDeintContext {
>      const AVClass *class;
>  
> diff --git a/libavfilter/vf_vpp_qsv.c b/libavfilter/vf_vpp_qsv.c
> index 5d57707455..83bdf1276c 100644
> --- a/libavfilter/vf_vpp_qsv.c
> +++ b/libavfilter/vf_vpp_qsv.c
> @@ -32,6 +32,7 @@
>  #include "formats.h"
>  #include "internal.h"
>  #include "avfilter.h"
> +#include "filters.h"
>  #include "libavcodec/avcodec.h"
>  #include "libavformat/avformat.h"
>  
> @@ -93,6 +94,9 @@ typedef struct VPPContext{
>      char *cx, *cy, *cw, *ch;
>      char *ow, *oh;
>      char *output_format_str;
> +
> +    int async_depth;
> +    int eof;
>  } VPPContext;
>  
>  static const AVOption options[] = {
> @@ -128,6 +132,7 @@ static const AVOption options[] = {
>      { "h",      "Output video height", OFFSET(oh), AV_OPT_TYPE_STRING, {
> .str="w*ch/cw" }, 0, 255, .flags = FLAGS },
>      { "height", "Output video height", OFFSET(oh), AV_OPT_TYPE_STRING, {
> .str="w*ch/cw" }, 0, 255, .flags = FLAGS },
>      { "format", "Output pixel format", OFFSET(output_format_str),
> AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS },
> +    { "async_depth", "Internal parallelization depth, the higher the value
> the higher the latency.", OFFSET(async_depth), AV_OPT_TYPE_INT, { .i64 =
> VPP_ASYNC_DEPTH_DEFAULT }, 0, INT_MAX, .flags = FLAGS },
>  
>      { NULL }
>  };
> @@ -303,6 +308,7 @@ static int config_output(AVFilterLink *outlink)
>      param.filter_frame  = NULL;
>      param.num_ext_buf   = 0;
>      param.ext_buf       = ext_buf;
> +    param.async_depth   = vpp->async_depth;
>  
>      if (inlink->format == AV_PIX_FMT_QSV) {
>           if (!inlink->hw_frames_ctx || !inlink->hw_frames_ctx->data)
> @@ -467,23 +473,64 @@ static int config_output(AVFilterLink *outlink)
>      return 0;
>  }
>  
> -static int filter_frame(AVFilterLink *inlink, AVFrame *picref)
> +static int activate(AVFilterContext *ctx)
>  {
> -    int              ret = 0;
> -    AVFilterContext  *ctx = inlink->dst;
> -    VPPContext       *vpp = inlink->dst->priv;
> -    AVFilterLink     *outlink = ctx->outputs[0];
> -
> -    if (vpp->qsv) {
> -        ret = ff_qsvvpp_filter_frame(vpp->qsv, inlink, picref);
> -        av_frame_free(&picref);
> +    AVFilterLink *inlink = ctx->inputs[0];
> +    AVFilterLink *outlink = ctx->outputs[0];
> +    VPPContext *s =ctx->priv;
> +    QSVVPPContext *qsv = s->qsv;
> +    AVFrame *in = NULL;
> +    int ret, status;
> +    int64_t pts;
> +
> +    FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
> +
> +    if (!s->eof) {
> +        ret = ff_inlink_consume_frame(inlink, &in);
> +        if (ret < 0)
> +            return ret;
> +
> +        if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
> +            if (status == AVERROR_EOF) {
> +                s->eof = 1;
> +            }
> +        }
> +    }
> +
> +    if (qsv) {
> +        if (in || s->eof) {
> +            qsv->eof = s->eof;
> +            ret = ff_qsvvpp_filter_frame(qsv, inlink, in);
> +            av_frame_free(&in);
> +
> +            if (s->eof) {
> +                ff_outlink_set_status(outlink, status, pts);
> +                return 0;
> +            }
> +
> +            if (qsv->got_frame) {
> +                qsv->got_frame = 0;
> +                return ret;
> +            }
> +        }
>      } else {
> -        if (picref->pts != AV_NOPTS_VALUE)
> -            picref->pts = av_rescale_q(picref->pts, inlink->time_base,
> outlink->time_base);
> -        ret = ff_filter_frame(outlink, picref);
> +        if (in) {
> +            if (in->pts != AV_NOPTS_VALUE)
> +                in->pts = av_rescale_q(in->pts, inlink->time_base, outlink-
> >time_base);
> +
> +            ret = ff_filter_frame(outlink, in);
> +            return ret;
> +        }
>      }
>  
> -    return ret;
> +    if (s->eof) {
> +        ff_outlink_set_status(outlink, status, pts);
> +        return 0;
> +    } else {
> +        FF_FILTER_FORWARD_WANTED(outlink, inlink);
> +    }
> +
> +    return FFERROR_NOT_READY;
>  }
>  
>  static int query_formats(AVFilterContext *ctx)
> @@ -531,7 +578,6 @@ static const AVFilterPad vpp_inputs[] = {
>          .name          = "default",
>          .type          = AVMEDIA_TYPE_VIDEO,
>          .config_props  = config_input,
> -        .filter_frame  = filter_frame,
>      },
>      { NULL }
>  };
> @@ -554,6 +600,7 @@ AVFilter ff_vf_vpp_qsv = {
>      .uninit        = vpp_uninit,
>      .inputs        = vpp_inputs,
>      .outputs       = vpp_outputs,
> +    .activate      = activate,
>      .priv_class    = &vpp_class,
>      .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
>  };
Fei Wang March 12, 2021, 8:04 a.m. UTC | #4
On Fri, 2021-03-12 at 06:20 +0000, Xiang, Haihao wrote:
> On Wed, 2021-01-27 at 09:42 +0800, Fei Wang wrote:
> > Async depth will allow qsv filter cache few frames, and avoid force
> > switch and end filter task frame by frame. This change will improve
> > performance for some multi-task case, for example 1:N transcode(
> > decode + vpp + encode) with all QSV plugins.
> > 
> > Signed-off-by: Fei Wang <fei.w.wang@intel.com>
> > ---
> >  libavfilter/qsvvpp.c             | 147 ++++++++++++++++++---------
> > ----
> >  libavfilter/qsvvpp.h             |  42 ++++++++-
> >  libavfilter/vf_deinterlace_qsv.c |   8 --
> >  libavfilter/vf_vpp_qsv.c         |  75 +++++++++++++---
> >  4 files changed, 187 insertions(+), 85 deletions(-)
> > 
> > diff --git a/libavfilter/qsvvpp.c b/libavfilter/qsvvpp.c
> > index f216b3f248..2e824e67e7 100644
> > --- a/libavfilter/qsvvpp.c
> > +++ b/libavfilter/qsvvpp.c
> > @@ -27,6 +27,7 @@
> >  #include "libavutil/hwcontext_qsv.h"
> >  #include "libavutil/time.h"
> >  #include "libavutil/pixdesc.h"
> > +#include "libavutil/fifo.h"
> >  
> >  #include "internal.h"
> >  #include "qsvvpp.h"
> > @@ -37,37 +38,6 @@
> >  #define IS_OPAQUE_MEMORY(mode) (mode & MFX_MEMTYPE_OPAQUE_FRAME)
> >  #define IS_SYSTEM_MEMORY(mode) (mode & MFX_MEMTYPE_SYSTEM_MEMORY)
> >  
> > -typedef struct QSVFrame {
> > -    AVFrame          *frame;
> > -    mfxFrameSurface1 *surface;
> > -    mfxFrameSurface1  surface_internal;  /* for system memory */
> > -    struct QSVFrame  *next;
> > -} QSVFrame;
> > -
> > -/* abstract struct for all QSV filters */
> > -struct QSVVPPContext {
> > -    mfxSession          session;
> > -    int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame);/*
> > callback
> > */
> > -    enum AVPixelFormat  out_sw_format;   /* Real output format */
> > -    mfxVideoParam       vpp_param;
> > -    mfxFrameInfo       *frame_infos;     /* frame info for each
> > input */
> > -
> > -    /* members related to the input/output surface */
> > -    int                 in_mem_mode;
> > -    int                 out_mem_mode;
> > -    QSVFrame           *in_frame_list;
> > -    QSVFrame           *out_frame_list;
> > -    int                 nb_surface_ptrs_in;
> > -    int                 nb_surface_ptrs_out;
> > -    mfxFrameSurface1  **surface_ptrs_in;
> > -    mfxFrameSurface1  **surface_ptrs_out;
> > -
> > -    /* MFXVPP extern parameters */
> > -    mfxExtOpaqueSurfaceAlloc opaque_alloc;
> > -    mfxExtBuffer      **ext_buffers;
> > -    int                 nb_ext_buffers;
> > -};
> > -
> >  static const mfxHandleType handle_types[] = {
> >      MFX_HANDLE_VA_DISPLAY,
> >      MFX_HANDLE_D3D9_DEVICE_MANAGER,
> > @@ -336,9 +306,9 @@ static int fill_frameinfo_by_link(mfxFrameInfo
> > *frameinfo,
> > AVFilterLink *link)
> >  static void clear_unused_frames(QSVFrame *list)
> >  {
> >      while (list) {
> > -        if (list->surface && !list->surface->Data.Locked) {
> > -            list->surface = NULL;
> > +        if (list->used && !list->queued && !list-
> > >surface.Data.Locked) {
> >              av_frame_free(&list->frame);
> > +            list->used = 0;
> >          }
> >          list = list->next;
> >      }
> > @@ -361,8 +331,10 @@ static QSVFrame *get_free_frame(QSVFrame
> > **list)
> >      QSVFrame *out = *list;
> >  
> >      for (; out; out = out->next) {
> > -        if (!out->surface)
> > +        if (!out->used) {
> > +            out->used = 1;
> >              break;
> > +        }
> >      }
> >  
> >      if (!out) {
> > @@ -371,6 +343,7 @@ static QSVFrame *get_free_frame(QSVFrame
> > **list)
> >              av_log(NULL, AV_LOG_ERROR, "Can't alloc new output
> > frame.\n");
> >              return NULL;
> >          }
> > +        out->used  = 1;
> >          out->next  = *list;
> >          *list      = out;
> >      }
> > @@ -402,7 +375,7 @@ static QSVFrame *submit_frame(QSVVPPContext *s,
> > AVFilterLink *inlink, AVFrame *p
> >              return NULL;
> >          }
> >          qsv_frame->frame   = av_frame_clone(picref);
> > -        qsv_frame->surface = (mfxFrameSurface1 *)qsv_frame->frame-
> > >data[3];
> > +        qsv_frame->surface = *(mfxFrameSurface1 *)qsv_frame-
> > >frame->data[3];
> >      } else {
> >          /* make a copy if the input is not padded as libmfx
> > requires */
> >          if (picref->height & 31 || picref->linesize[0] & 31) {
> > @@ -425,27 +398,26 @@ static QSVFrame *submit_frame(QSVVPPContext
> > *s,
> > AVFilterLink *inlink, AVFrame *p
> >              qsv_frame->frame = av_frame_clone(picref);
> >  
> >          if (map_frame_to_surface(qsv_frame->frame,
> > -                                &qsv_frame->surface_internal) < 0)
> > {
> > +                                 &qsv_frame->surface) < 0) {
> >              av_log(ctx, AV_LOG_ERROR, "Unsupported frame.\n");
> >              return NULL;
> >          }
> > -        qsv_frame->surface = &qsv_frame->surface_internal;
> >      }
> >  
> > -    qsv_frame->surface->Info           = s-
> > > frame_infos[FF_INLINK_IDX(inlink)];
> > 
> > -    qsv_frame->surface->Data.TimeStamp = av_rescale_q(qsv_frame-
> > >frame->pts,
> > +    qsv_frame->surface.Info           = s-
> > > frame_infos[FF_INLINK_IDX(inlink)];
> > 
> > +    qsv_frame->surface.Data.TimeStamp = av_rescale_q(qsv_frame-
> > >frame->pts,
> >                                                        inlink-
> > >time_base,
> > default_tb);
> >  
> > -    qsv_frame->surface->Info.PicStruct =
> > +    qsv_frame->surface.Info.PicStruct =
> >              !qsv_frame->frame->interlaced_frame ?
> > MFX_PICSTRUCT_PROGRESSIVE :
> >              (qsv_frame->frame->top_field_first ?
> > MFX_PICSTRUCT_FIELD_TFF :
> >                                                   MFX_PICSTRUCT_FIE
> > LD_BFF);
> >      if (qsv_frame->frame->repeat_pict == 1)
> > -        qsv_frame->surface->Info.PicStruct |=
> > MFX_PICSTRUCT_FIELD_REPEATED;
> > +        qsv_frame->surface.Info.PicStruct |=
> > MFX_PICSTRUCT_FIELD_REPEATED;
> >      else if (qsv_frame->frame->repeat_pict == 2)
> > -        qsv_frame->surface->Info.PicStruct |=
> > MFX_PICSTRUCT_FRAME_DOUBLING;
> > +        qsv_frame->surface.Info.PicStruct |=
> > MFX_PICSTRUCT_FRAME_DOUBLING;
> >      else if (qsv_frame->frame->repeat_pict == 4)
> > -        qsv_frame->surface->Info.PicStruct |=
> > MFX_PICSTRUCT_FRAME_TRIPLING;
> > +        qsv_frame->surface.Info.PicStruct |=
> > MFX_PICSTRUCT_FRAME_TRIPLING;
> >  
> >      return qsv_frame;
> >  }
> > @@ -476,7 +448,7 @@ static QSVFrame *query_frame(QSVVPPContext *s,
> > AVFilterLink *outlink)
> >              return NULL;
> >          }
> >  
> > -        out_frame->surface = (mfxFrameSurface1 *)out_frame->frame-
> > >data[3];
> > +        out_frame->surface = *(mfxFrameSurface1 *)out_frame-
> > >frame->data[3];
> >      } else {
> >          /* Get a frame with aligned dimensions.
> >           * Libmfx need system memory being 128x64 aligned */
> > @@ -490,14 +462,12 @@ static QSVFrame *query_frame(QSVVPPContext
> > *s,
> > AVFilterLink *outlink)
> >          out_frame->frame->height = outlink->h;
> >  
> >          ret = map_frame_to_surface(out_frame->frame,
> > -                                  &out_frame->surface_internal);
> > +                                   &out_frame->surface);
> >          if (ret < 0)
> >              return NULL;
> > -
> > -        out_frame->surface = &out_frame->surface_internal;
> >      }
> >  
> > -    out_frame->surface->Info = s->vpp_param.vpp.Out;
> > +    out_frame->surface.Info = s->vpp_param.vpp.Out;
> >  
> >      return out_frame;
> >  }
> > @@ -666,6 +636,16 @@ static int init_vpp_session(AVFilterContext
> > *avctx,
> > QSVVPPContext *s)
> >      return 0;
> >  }
> >  
> > +static unsigned int qsv_fifo_item_size(void)
> > +{
> > +    return sizeof(mfxSyncPoint*) + sizeof(QSVFrame*);
> > +}
> > +
> > +static unsigned int qsv_fifo_size(const AVFifoBuffer* fifo)
> > +{
> > +    return  av_fifo_size(fifo)/qsv_fifo_item_size();
> > +}
> > +
> >  int ff_qsvvpp_create(AVFilterContext *avctx, QSVVPPContext **vpp,
> > QSVVPPParam
> > *param)
> >  {
> >      int i;
> > @@ -738,7 +718,17 @@ int ff_qsvvpp_create(AVFilterContext *avctx,
> > QSVVPPContext **vpp, QSVVPPParam *p
> >          s->vpp_param.ExtParam    = param->ext_buf;
> >      }
> >  
> > -    s->vpp_param.AsyncDepth = 1;
> > +    s->got_frame = 0;
> > +
> > +    /** keep fifo size at least 1. Even when async_depth is 0,
> > fifo is used.
> > */
> > +    s->async_fifo  = av_fifo_alloc((param->async_depth + 1) *
> > qsv_fifo_item_size());
> > +    s->async_depth = param->async_depth;
> > +    if (!s->async_fifo) {
> > +        ret = AVERROR(ENOMEM);
> > +        goto failed;
> > +    }
> > +
> > +    s->vpp_param.AsyncDepth = param->async_depth;
> >  
> >      if (IS_SYSTEM_MEMORY(s->in_mem_mode))
> >          s->vpp_param.IOPattern |= MFX_IOPATTERN_IN_SYSTEM_MEMORY;
> > @@ -793,6 +783,7 @@ int ff_qsvvpp_free(QSVVPPContext **vpp)
> >      av_freep(&s->surface_ptrs_out);
> >      av_freep(&s->ext_buffers);
> >      av_freep(&s->frame_infos);
> > +    av_fifo_free(s->async_fifo);
> >      av_freep(vpp);
> >  
> >      return 0;
> > @@ -803,9 +794,29 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s,
> > AVFilterLink
> > *inlink, AVFrame *picr
> >      AVFilterContext  *ctx     = inlink->dst;
> >      AVFilterLink     *outlink = ctx->outputs[0];
> >      mfxSyncPoint      sync;
> > -    QSVFrame         *in_frame, *out_frame;
> > +    QSVFrame         *in_frame, *out_frame, *tmp;
> >      int               ret, filter_ret;
> >  
> > +    while (s->eof && qsv_fifo_size(s->async_fifo)) {
> > +        av_fifo_generic_read(s->async_fifo, &tmp, sizeof(tmp),
> > NULL);
> > +        av_fifo_generic_read(s->async_fifo, &sync, sizeof(sync),
> > NULL);
> > +        if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) <
> > 0)
> > +            av_log(ctx, AV_LOG_WARNING, "Sync failed.\n");
> > +
> > +        filter_ret = s->filter_frame(outlink, tmp->frame);
> > +        if (filter_ret < 0) {
> > +            av_frame_free(&tmp->frame);
> > +            ret = filter_ret;
> > +            break;
> > +        }
> > +        tmp->queued = 0;
> > +        s->got_frame = 1;
> > +        tmp->frame = NULL;
> > +    };
> > +
> > +    if (!picref)
> > +        return 0;
> > +
> >      in_frame = submit_frame(s, inlink, picref);
> >      if (!in_frame) {
> >          av_log(ctx, AV_LOG_ERROR, "Failed to submit frame on
> > input[%d]\n",
> > @@ -821,8 +832,8 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s,
> > AVFilterLink
> > *inlink, AVFrame *picr
> >          }
> >  
> >          do {
> > -            ret = MFXVideoVPP_RunFrameVPPAsync(s->session,
> > in_frame->surface,
> > -                                               out_frame->surface, 
> > NULL,
> > &sync);
> > +            ret = MFXVideoVPP_RunFrameVPPAsync(s->session,
> > &in_frame-
> > > surface,
> > 
> > +                                               &out_frame-
> > >surface, NULL,
> > &sync);
> >              if (ret == MFX_WRN_DEVICE_BUSY)
> >                  av_usleep(500);
> >          } while (ret == MFX_WRN_DEVICE_BUSY);
> > @@ -833,20 +844,32 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s,
> > AVFilterLink *inlink, AVFrame *picr
> >                  ret = AVERROR(EAGAIN);
> >              break;
> >          }
> > +        out_frame->frame->pts = av_rescale_q(out_frame-
> > > surface.Data.TimeStamp,
> > 
> > +                                             default_tb, outlink-
> > >time_base);
> >  
> > -        if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) <
> > 0)
> > -            av_log(ctx, AV_LOG_WARNING, "Sync failed.\n");
> > +        out_frame->queued = 1;
> > +        av_fifo_generic_write(s->async_fifo, &out_frame,
> > sizeof(out_frame),
> > NULL);
> > +        av_fifo_generic_write(s->async_fifo, &sync, sizeof(sync),
> > NULL);
> >  
> > -        out_frame->frame->pts = av_rescale_q(out_frame->surface-
> > > Data.TimeStamp,
> > 
> > -                                             default_tb, outlink-
> > >time_base);
> >  
> > -        filter_ret = s->filter_frame(outlink, out_frame->frame);
> > -        if (filter_ret < 0) {
> > -            av_frame_free(&out_frame->frame);
> > -            ret = filter_ret;
> > -            break;
> > +        if (qsv_fifo_size(s->async_fifo) > s->async_depth) {
> > +            av_fifo_generic_read(s->async_fifo, &tmp, sizeof(tmp),
> > NULL);
> > +            av_fifo_generic_read(s->async_fifo, &sync,
> > sizeof(sync), NULL);
> > +
> > +            if (MFXVideoCORE_SyncOperation(s->session, sync, 1000)
> > < 0)
> > +                av_log(ctx, AV_LOG_WARNING, "Sync failed.\n");
> > +
> > +            filter_ret = s->filter_frame(outlink, tmp->frame);
> > +            if (filter_ret < 0) {
> > +                av_frame_free(&tmp->frame);
> > +                ret = filter_ret;
> > +                break;
> > +            }
> > +
> > +            tmp->queued = 0;
> > +            s->got_frame = 1;
> > +            tmp->frame = NULL;
> >          }
> > -        out_frame->frame = NULL;
> >      } while(ret == MFX_ERR_MORE_SURFACE);
> >  
> >      return ret;
> > diff --git a/libavfilter/qsvvpp.h b/libavfilter/qsvvpp.h
> > index b4baeedf9e..48c8ffc2d2 100644
> > --- a/libavfilter/qsvvpp.h
> > +++ b/libavfilter/qsvvpp.h
> > @@ -27,6 +27,7 @@
> >  #include <mfx/mfxvideo.h>
> >  
> >  #include "avfilter.h"
> > +#include "libavutil/fifo.h"
> >  
> >  #define FF_INLINK_IDX(link)  ((int)((link)->dstpad - (link)->dst-
> > > input_pads))
> > 
> >  #define FF_OUTLINK_IDX(link) ((int)((link)->srcpad - (link)->src-
> > > output_pads))
> > 
> > @@ -39,7 +40,44 @@
> >      ((MFX_VERSION.Major > (MAJOR)) ||                           \
> >      (MFX_VERSION.Major == (MAJOR) && MFX_VERSION.Minor >=
> > (MINOR)))
> >  
> > -typedef struct QSVVPPContext QSVVPPContext;
> > +#define VPP_ASYNC_DEPTH_DEFAULT 1
> > +
> > +typedef struct QSVFrame {
> > +    AVFrame          *frame;
> > +    mfxFrameSurface1 surface;
> > +    struct QSVFrame  *next;
> > +    int queued;
> > +    int used;
> 
> May we use queue count as what wenbin did for qsv decoder in 
> http://ffmpeg.org/pipermail/ffmpeg-devel/2021-March/277633.html ? If
> so, I think
> we may not use the member of 'used' and the logic will besimple. 
> 
> Thanks
> Haihao 

It's a better choice. I will try to see if it possible to combine them
into one. Thanks for reviewing.

Fei
> 
> 
> > +} QSVFrame;
> > +
> > +typedef struct QSVVPPContext {
> > +    mfxSession          session;
> > +    int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame);
> > /**<
> > callback */
> > +    enum AVPixelFormat  out_sw_format;   /**< Real output format
> > */
> > +    mfxVideoParam       vpp_param;
> > +    mfxFrameInfo       *frame_infos;     /**< frame info for each
> > input */
> > +
> > +    /** members related to the input/output surface */
> > +    int                 in_mem_mode;
> > +    int                 out_mem_mode;
> > +    QSVFrame           *in_frame_list;
> > +    QSVFrame           *out_frame_list;
> > +    int                 nb_surface_ptrs_in;
> > +    int                 nb_surface_ptrs_out;
> > +    mfxFrameSurface1  **surface_ptrs_in;
> > +    mfxFrameSurface1  **surface_ptrs_out;
> > +
> > +    /** MFXVPP extern parameters */
> > +    mfxExtOpaqueSurfaceAlloc opaque_alloc;
> > +    mfxExtBuffer      **ext_buffers;
> > +    int                 nb_ext_buffers;
> > +
> > +    int got_frame;
> > +    int async_depth;
> > +    int eof;
> > +    /** order with frame_out, sync */
> > +    AVFifoBuffer *async_fifo;
> > +} QSVVPPContext;
> >  
> >  typedef struct QSVVPPCrop {
> >      int in_idx;        ///< Input index
> > @@ -60,6 +98,8 @@ typedef struct QSVVPPParam {
> >      /* Crop information for each input, if needed */
> >      int num_crop;
> >      QSVVPPCrop *crop;
> > +
> > +   int async_depth;
> >  } QSVVPPParam;
> >  
> >  /* create and initialize the QSV session */
> > diff --git a/libavfilter/vf_deinterlace_qsv.c
> > b/libavfilter/vf_deinterlace_qsv.c
> > index 89a282f99e..a620567de2 100644
> > --- a/libavfilter/vf_deinterlace_qsv.c
> > +++ b/libavfilter/vf_deinterlace_qsv.c
> > @@ -47,14 +47,6 @@ enum {
> >      QSVDEINT_MORE_INPUT,
> >  };
> >  
> > -typedef struct QSVFrame {
> > -    AVFrame *frame;
> > -    mfxFrameSurface1 surface;
> > -    int used;
> > -
> > -    struct QSVFrame *next;
> > -} QSVFrame;
> > -
> >  typedef struct QSVDeintContext {
> >      const AVClass *class;
> >  
> > diff --git a/libavfilter/vf_vpp_qsv.c b/libavfilter/vf_vpp_qsv.c
> > index 5d57707455..83bdf1276c 100644
> > --- a/libavfilter/vf_vpp_qsv.c
> > +++ b/libavfilter/vf_vpp_qsv.c
> > @@ -32,6 +32,7 @@
> >  #include "formats.h"
> >  #include "internal.h"
> >  #include "avfilter.h"
> > +#include "filters.h"
> >  #include "libavcodec/avcodec.h"
> >  #include "libavformat/avformat.h"
> >  
> > @@ -93,6 +94,9 @@ typedef struct VPPContext{
> >      char *cx, *cy, *cw, *ch;
> >      char *ow, *oh;
> >      char *output_format_str;
> > +
> > +    int async_depth;
> > +    int eof;
> >  } VPPContext;
> >  
> >  static const AVOption options[] = {
> > @@ -128,6 +132,7 @@ static const AVOption options[] = {
> >      { "h",      "Output video height", OFFSET(oh),
> > AV_OPT_TYPE_STRING, {
> > .str="w*ch/cw" }, 0, 255, .flags = FLAGS },
> >      { "height", "Output video height", OFFSET(oh),
> > AV_OPT_TYPE_STRING, {
> > .str="w*ch/cw" }, 0, 255, .flags = FLAGS },
> >      { "format", "Output pixel format", OFFSET(output_format_str),
> > AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS },
> > +    { "async_depth", "Internal parallelization depth, the higher
> > the value
> > the higher the latency.", OFFSET(async_depth), AV_OPT_TYPE_INT, {
> > .i64 =
> > VPP_ASYNC_DEPTH_DEFAULT }, 0, INT_MAX, .flags = FLAGS },
> >  
> >      { NULL }
> >  };
> > @@ -303,6 +308,7 @@ static int config_output(AVFilterLink *outlink)
> >      param.filter_frame  = NULL;
> >      param.num_ext_buf   = 0;
> >      param.ext_buf       = ext_buf;
> > +    param.async_depth   = vpp->async_depth;
> >  
> >      if (inlink->format == AV_PIX_FMT_QSV) {
> >           if (!inlink->hw_frames_ctx || !inlink->hw_frames_ctx-
> > >data)
> > @@ -467,23 +473,64 @@ static int config_output(AVFilterLink
> > *outlink)
> >      return 0;
> >  }
> >  
> > -static int filter_frame(AVFilterLink *inlink, AVFrame *picref)
> > +static int activate(AVFilterContext *ctx)
> >  {
> > -    int              ret = 0;
> > -    AVFilterContext  *ctx = inlink->dst;
> > -    VPPContext       *vpp = inlink->dst->priv;
> > -    AVFilterLink     *outlink = ctx->outputs[0];
> > -
> > -    if (vpp->qsv) {
> > -        ret = ff_qsvvpp_filter_frame(vpp->qsv, inlink, picref);
> > -        av_frame_free(&picref);
> > +    AVFilterLink *inlink = ctx->inputs[0];
> > +    AVFilterLink *outlink = ctx->outputs[0];
> > +    VPPContext *s =ctx->priv;
> > +    QSVVPPContext *qsv = s->qsv;
> > +    AVFrame *in = NULL;
> > +    int ret, status;
> > +    int64_t pts;
> > +
> > +    FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
> > +
> > +    if (!s->eof) {
> > +        ret = ff_inlink_consume_frame(inlink, &in);
> > +        if (ret < 0)
> > +            return ret;
> > +
> > +        if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
> > +            if (status == AVERROR_EOF) {
> > +                s->eof = 1;
> > +            }
> > +        }
> > +    }
> > +
> > +    if (qsv) {
> > +        if (in || s->eof) {
> > +            qsv->eof = s->eof;
> > +            ret = ff_qsvvpp_filter_frame(qsv, inlink, in);
> > +            av_frame_free(&in);
> > +
> > +            if (s->eof) {
> > +                ff_outlink_set_status(outlink, status, pts);
> > +                return 0;
> > +            }
> > +
> > +            if (qsv->got_frame) {
> > +                qsv->got_frame = 0;
> > +                return ret;
> > +            }
> > +        }
> >      } else {
> > -        if (picref->pts != AV_NOPTS_VALUE)
> > -            picref->pts = av_rescale_q(picref->pts, inlink-
> > >time_base,
> > outlink->time_base);
> > -        ret = ff_filter_frame(outlink, picref);
> > +        if (in) {
> > +            if (in->pts != AV_NOPTS_VALUE)
> > +                in->pts = av_rescale_q(in->pts, inlink->time_base, 
> > outlink-
> > > time_base);
> > 
> > +
> > +            ret = ff_filter_frame(outlink, in);
> > +            return ret;
> > +        }
> >      }
> >  
> > -    return ret;
> > +    if (s->eof) {
> > +        ff_outlink_set_status(outlink, status, pts);
> > +        return 0;
> > +    } else {
> > +        FF_FILTER_FORWARD_WANTED(outlink, inlink);
> > +    }
> > +
> > +    return FFERROR_NOT_READY;
> >  }
> >  
> >  static int query_formats(AVFilterContext *ctx)
> > @@ -531,7 +578,6 @@ static const AVFilterPad vpp_inputs[] = {
> >          .name          = "default",
> >          .type          = AVMEDIA_TYPE_VIDEO,
> >          .config_props  = config_input,
> > -        .filter_frame  = filter_frame,
> >      },
> >      { NULL }
> >  };
> > @@ -554,6 +600,7 @@ AVFilter ff_vf_vpp_qsv = {
> >      .uninit        = vpp_uninit,
> >      .inputs        = vpp_inputs,
> >      .outputs       = vpp_outputs,
> > +    .activate      = activate,
> >      .priv_class    = &vpp_class,
> >      .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
> >  };
diff mbox series

Patch

diff --git a/libavfilter/qsvvpp.c b/libavfilter/qsvvpp.c
index f216b3f248..2e824e67e7 100644
--- a/libavfilter/qsvvpp.c
+++ b/libavfilter/qsvvpp.c
@@ -27,6 +27,7 @@ 
 #include "libavutil/hwcontext_qsv.h"
 #include "libavutil/time.h"
 #include "libavutil/pixdesc.h"
+#include "libavutil/fifo.h"
 
 #include "internal.h"
 #include "qsvvpp.h"
@@ -37,37 +38,6 @@ 
 #define IS_OPAQUE_MEMORY(mode) (mode & MFX_MEMTYPE_OPAQUE_FRAME)
 #define IS_SYSTEM_MEMORY(mode) (mode & MFX_MEMTYPE_SYSTEM_MEMORY)
 
-typedef struct QSVFrame {
-    AVFrame          *frame;
-    mfxFrameSurface1 *surface;
-    mfxFrameSurface1  surface_internal;  /* for system memory */
-    struct QSVFrame  *next;
-} QSVFrame;
-
-/* abstract struct for all QSV filters */
-struct QSVVPPContext {
-    mfxSession          session;
-    int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame);/* callback */
-    enum AVPixelFormat  out_sw_format;   /* Real output format */
-    mfxVideoParam       vpp_param;
-    mfxFrameInfo       *frame_infos;     /* frame info for each input */
-
-    /* members related to the input/output surface */
-    int                 in_mem_mode;
-    int                 out_mem_mode;
-    QSVFrame           *in_frame_list;
-    QSVFrame           *out_frame_list;
-    int                 nb_surface_ptrs_in;
-    int                 nb_surface_ptrs_out;
-    mfxFrameSurface1  **surface_ptrs_in;
-    mfxFrameSurface1  **surface_ptrs_out;
-
-    /* MFXVPP extern parameters */
-    mfxExtOpaqueSurfaceAlloc opaque_alloc;
-    mfxExtBuffer      **ext_buffers;
-    int                 nb_ext_buffers;
-};
-
 static const mfxHandleType handle_types[] = {
     MFX_HANDLE_VA_DISPLAY,
     MFX_HANDLE_D3D9_DEVICE_MANAGER,
@@ -336,9 +306,9 @@  static int fill_frameinfo_by_link(mfxFrameInfo *frameinfo, AVFilterLink *link)
 static void clear_unused_frames(QSVFrame *list)
 {
     while (list) {
-        if (list->surface && !list->surface->Data.Locked) {
-            list->surface = NULL;
+        if (list->used && !list->queued && !list->surface.Data.Locked) {
             av_frame_free(&list->frame);
+            list->used = 0;
         }
         list = list->next;
     }
@@ -361,8 +331,10 @@  static QSVFrame *get_free_frame(QSVFrame **list)
     QSVFrame *out = *list;
 
     for (; out; out = out->next) {
-        if (!out->surface)
+        if (!out->used) {
+            out->used = 1;
             break;
+        }
     }
 
     if (!out) {
@@ -371,6 +343,7 @@  static QSVFrame *get_free_frame(QSVFrame **list)
             av_log(NULL, AV_LOG_ERROR, "Can't alloc new output frame.\n");
             return NULL;
         }
+        out->used  = 1;
         out->next  = *list;
         *list      = out;
     }
@@ -402,7 +375,7 @@  static QSVFrame *submit_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *p
             return NULL;
         }
         qsv_frame->frame   = av_frame_clone(picref);
-        qsv_frame->surface = (mfxFrameSurface1 *)qsv_frame->frame->data[3];
+        qsv_frame->surface = *(mfxFrameSurface1 *)qsv_frame->frame->data[3];
     } else {
         /* make a copy if the input is not padded as libmfx requires */
         if (picref->height & 31 || picref->linesize[0] & 31) {
@@ -425,27 +398,26 @@  static QSVFrame *submit_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *p
             qsv_frame->frame = av_frame_clone(picref);
 
         if (map_frame_to_surface(qsv_frame->frame,
-                                &qsv_frame->surface_internal) < 0) {
+                                 &qsv_frame->surface) < 0) {
             av_log(ctx, AV_LOG_ERROR, "Unsupported frame.\n");
             return NULL;
         }
-        qsv_frame->surface = &qsv_frame->surface_internal;
     }
 
-    qsv_frame->surface->Info           = s->frame_infos[FF_INLINK_IDX(inlink)];
-    qsv_frame->surface->Data.TimeStamp = av_rescale_q(qsv_frame->frame->pts,
+    qsv_frame->surface.Info           = s->frame_infos[FF_INLINK_IDX(inlink)];
+    qsv_frame->surface.Data.TimeStamp = av_rescale_q(qsv_frame->frame->pts,
                                                       inlink->time_base, default_tb);
 
-    qsv_frame->surface->Info.PicStruct =
+    qsv_frame->surface.Info.PicStruct =
             !qsv_frame->frame->interlaced_frame ? MFX_PICSTRUCT_PROGRESSIVE :
             (qsv_frame->frame->top_field_first ? MFX_PICSTRUCT_FIELD_TFF :
                                                  MFX_PICSTRUCT_FIELD_BFF);
     if (qsv_frame->frame->repeat_pict == 1)
-        qsv_frame->surface->Info.PicStruct |= MFX_PICSTRUCT_FIELD_REPEATED;
+        qsv_frame->surface.Info.PicStruct |= MFX_PICSTRUCT_FIELD_REPEATED;
     else if (qsv_frame->frame->repeat_pict == 2)
-        qsv_frame->surface->Info.PicStruct |= MFX_PICSTRUCT_FRAME_DOUBLING;
+        qsv_frame->surface.Info.PicStruct |= MFX_PICSTRUCT_FRAME_DOUBLING;
     else if (qsv_frame->frame->repeat_pict == 4)
-        qsv_frame->surface->Info.PicStruct |= MFX_PICSTRUCT_FRAME_TRIPLING;
+        qsv_frame->surface.Info.PicStruct |= MFX_PICSTRUCT_FRAME_TRIPLING;
 
     return qsv_frame;
 }
@@ -476,7 +448,7 @@  static QSVFrame *query_frame(QSVVPPContext *s, AVFilterLink *outlink)
             return NULL;
         }
 
-        out_frame->surface = (mfxFrameSurface1 *)out_frame->frame->data[3];
+        out_frame->surface = *(mfxFrameSurface1 *)out_frame->frame->data[3];
     } else {
         /* Get a frame with aligned dimensions.
          * Libmfx need system memory being 128x64 aligned */
@@ -490,14 +462,12 @@  static QSVFrame *query_frame(QSVVPPContext *s, AVFilterLink *outlink)
         out_frame->frame->height = outlink->h;
 
         ret = map_frame_to_surface(out_frame->frame,
-                                  &out_frame->surface_internal);
+                                   &out_frame->surface);
         if (ret < 0)
             return NULL;
-
-        out_frame->surface = &out_frame->surface_internal;
     }
 
-    out_frame->surface->Info = s->vpp_param.vpp.Out;
+    out_frame->surface.Info = s->vpp_param.vpp.Out;
 
     return out_frame;
 }
@@ -666,6 +636,16 @@  static int init_vpp_session(AVFilterContext *avctx, QSVVPPContext *s)
     return 0;
 }
 
+static unsigned int qsv_fifo_item_size(void)
+{
+    return sizeof(mfxSyncPoint*) + sizeof(QSVFrame*);
+}
+
+static unsigned int qsv_fifo_size(const AVFifoBuffer* fifo)
+{
+    return  av_fifo_size(fifo)/qsv_fifo_item_size();
+}
+
 int ff_qsvvpp_create(AVFilterContext *avctx, QSVVPPContext **vpp, QSVVPPParam *param)
 {
     int i;
@@ -738,7 +718,17 @@  int ff_qsvvpp_create(AVFilterContext *avctx, QSVVPPContext **vpp, QSVVPPParam *p
         s->vpp_param.ExtParam    = param->ext_buf;
     }
 
-    s->vpp_param.AsyncDepth = 1;
+    s->got_frame = 0;
+
+    /** keep fifo size at least 1. Even when async_depth is 0, fifo is used. */
+    s->async_fifo  = av_fifo_alloc((param->async_depth + 1) * qsv_fifo_item_size());
+    s->async_depth = param->async_depth;
+    if (!s->async_fifo) {
+        ret = AVERROR(ENOMEM);
+        goto failed;
+    }
+
+    s->vpp_param.AsyncDepth = param->async_depth;
 
     if (IS_SYSTEM_MEMORY(s->in_mem_mode))
         s->vpp_param.IOPattern |= MFX_IOPATTERN_IN_SYSTEM_MEMORY;
@@ -793,6 +783,7 @@  int ff_qsvvpp_free(QSVVPPContext **vpp)
     av_freep(&s->surface_ptrs_out);
     av_freep(&s->ext_buffers);
     av_freep(&s->frame_infos);
+    av_fifo_free(s->async_fifo);
     av_freep(vpp);
 
     return 0;
@@ -803,9 +794,29 @@  int ff_qsvvpp_filter_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *picr
     AVFilterContext  *ctx     = inlink->dst;
     AVFilterLink     *outlink = ctx->outputs[0];
     mfxSyncPoint      sync;
-    QSVFrame         *in_frame, *out_frame;
+    QSVFrame         *in_frame, *out_frame, *tmp;
     int               ret, filter_ret;
 
+    while (s->eof && qsv_fifo_size(s->async_fifo)) {
+        av_fifo_generic_read(s->async_fifo, &tmp, sizeof(tmp), NULL);
+        av_fifo_generic_read(s->async_fifo, &sync, sizeof(sync), NULL);
+        if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0)
+            av_log(ctx, AV_LOG_WARNING, "Sync failed.\n");
+
+        filter_ret = s->filter_frame(outlink, tmp->frame);
+        if (filter_ret < 0) {
+            av_frame_free(&tmp->frame);
+            ret = filter_ret;
+            break;
+        }
+        tmp->queued = 0;
+        s->got_frame = 1;
+        tmp->frame = NULL;
+    };
+
+    if (!picref)
+        return 0;
+
     in_frame = submit_frame(s, inlink, picref);
     if (!in_frame) {
         av_log(ctx, AV_LOG_ERROR, "Failed to submit frame on input[%d]\n",
@@ -821,8 +832,8 @@  int ff_qsvvpp_filter_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *picr
         }
 
         do {
-            ret = MFXVideoVPP_RunFrameVPPAsync(s->session, in_frame->surface,
-                                               out_frame->surface, NULL, &sync);
+            ret = MFXVideoVPP_RunFrameVPPAsync(s->session, &in_frame->surface,
+                                               &out_frame->surface, NULL, &sync);
             if (ret == MFX_WRN_DEVICE_BUSY)
                 av_usleep(500);
         } while (ret == MFX_WRN_DEVICE_BUSY);
@@ -833,20 +844,32 @@  int ff_qsvvpp_filter_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *picr
                 ret = AVERROR(EAGAIN);
             break;
         }
+        out_frame->frame->pts = av_rescale_q(out_frame->surface.Data.TimeStamp,
+                                             default_tb, outlink->time_base);
 
-        if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0)
-            av_log(ctx, AV_LOG_WARNING, "Sync failed.\n");
+        out_frame->queued = 1;
+        av_fifo_generic_write(s->async_fifo, &out_frame, sizeof(out_frame), NULL);
+        av_fifo_generic_write(s->async_fifo, &sync, sizeof(sync), NULL);
 
-        out_frame->frame->pts = av_rescale_q(out_frame->surface->Data.TimeStamp,
-                                             default_tb, outlink->time_base);
 
-        filter_ret = s->filter_frame(outlink, out_frame->frame);
-        if (filter_ret < 0) {
-            av_frame_free(&out_frame->frame);
-            ret = filter_ret;
-            break;
+        if (qsv_fifo_size(s->async_fifo) > s->async_depth) {
+            av_fifo_generic_read(s->async_fifo, &tmp, sizeof(tmp), NULL);
+            av_fifo_generic_read(s->async_fifo, &sync, sizeof(sync), NULL);
+
+            if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0)
+                av_log(ctx, AV_LOG_WARNING, "Sync failed.\n");
+
+            filter_ret = s->filter_frame(outlink, tmp->frame);
+            if (filter_ret < 0) {
+                av_frame_free(&tmp->frame);
+                ret = filter_ret;
+                break;
+            }
+
+            tmp->queued = 0;
+            s->got_frame = 1;
+            tmp->frame = NULL;
         }
-        out_frame->frame = NULL;
     } while(ret == MFX_ERR_MORE_SURFACE);
 
     return ret;
diff --git a/libavfilter/qsvvpp.h b/libavfilter/qsvvpp.h
index b4baeedf9e..48c8ffc2d2 100644
--- a/libavfilter/qsvvpp.h
+++ b/libavfilter/qsvvpp.h
@@ -27,6 +27,7 @@ 
 #include <mfx/mfxvideo.h>
 
 #include "avfilter.h"
+#include "libavutil/fifo.h"
 
 #define FF_INLINK_IDX(link)  ((int)((link)->dstpad - (link)->dst->input_pads))
 #define FF_OUTLINK_IDX(link) ((int)((link)->srcpad - (link)->src->output_pads))
@@ -39,7 +40,44 @@ 
     ((MFX_VERSION.Major > (MAJOR)) ||                           \
     (MFX_VERSION.Major == (MAJOR) && MFX_VERSION.Minor >= (MINOR)))
 
-typedef struct QSVVPPContext QSVVPPContext;
+#define VPP_ASYNC_DEPTH_DEFAULT 1
+
+typedef struct QSVFrame {
+    AVFrame          *frame;
+    mfxFrameSurface1 surface;
+    struct QSVFrame  *next;
+    int queued;
+    int used;
+} QSVFrame;
+
+typedef struct QSVVPPContext {
+    mfxSession          session;
+    int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame); /**< callback */
+    enum AVPixelFormat  out_sw_format;   /**< Real output format */
+    mfxVideoParam       vpp_param;
+    mfxFrameInfo       *frame_infos;     /**< frame info for each input */
+
+    /** members related to the input/output surface */
+    int                 in_mem_mode;
+    int                 out_mem_mode;
+    QSVFrame           *in_frame_list;
+    QSVFrame           *out_frame_list;
+    int                 nb_surface_ptrs_in;
+    int                 nb_surface_ptrs_out;
+    mfxFrameSurface1  **surface_ptrs_in;
+    mfxFrameSurface1  **surface_ptrs_out;
+
+    /** MFXVPP extern parameters */
+    mfxExtOpaqueSurfaceAlloc opaque_alloc;
+    mfxExtBuffer      **ext_buffers;
+    int                 nb_ext_buffers;
+
+    int got_frame;
+    int async_depth;
+    int eof;
+    /** order with frame_out, sync */
+    AVFifoBuffer *async_fifo;
+} QSVVPPContext;
 
 typedef struct QSVVPPCrop {
     int in_idx;        ///< Input index
@@ -60,6 +98,8 @@  typedef struct QSVVPPParam {
     /* Crop information for each input, if needed */
     int num_crop;
     QSVVPPCrop *crop;
+
+   int async_depth;
 } QSVVPPParam;
 
 /* create and initialize the QSV session */
diff --git a/libavfilter/vf_deinterlace_qsv.c b/libavfilter/vf_deinterlace_qsv.c
index 89a282f99e..a620567de2 100644
--- a/libavfilter/vf_deinterlace_qsv.c
+++ b/libavfilter/vf_deinterlace_qsv.c
@@ -47,14 +47,6 @@  enum {
     QSVDEINT_MORE_INPUT,
 };
 
-typedef struct QSVFrame {
-    AVFrame *frame;
-    mfxFrameSurface1 surface;
-    int used;
-
-    struct QSVFrame *next;
-} QSVFrame;
-
 typedef struct QSVDeintContext {
     const AVClass *class;
 
diff --git a/libavfilter/vf_vpp_qsv.c b/libavfilter/vf_vpp_qsv.c
index 5d57707455..83bdf1276c 100644
--- a/libavfilter/vf_vpp_qsv.c
+++ b/libavfilter/vf_vpp_qsv.c
@@ -32,6 +32,7 @@ 
 #include "formats.h"
 #include "internal.h"
 #include "avfilter.h"
+#include "filters.h"
 #include "libavcodec/avcodec.h"
 #include "libavformat/avformat.h"
 
@@ -93,6 +94,9 @@  typedef struct VPPContext{
     char *cx, *cy, *cw, *ch;
     char *ow, *oh;
     char *output_format_str;
+
+    int async_depth;
+    int eof;
 } VPPContext;
 
 static const AVOption options[] = {
@@ -128,6 +132,7 @@  static const AVOption options[] = {
     { "h",      "Output video height", OFFSET(oh), AV_OPT_TYPE_STRING, { .str="w*ch/cw" }, 0, 255, .flags = FLAGS },
     { "height", "Output video height", OFFSET(oh), AV_OPT_TYPE_STRING, { .str="w*ch/cw" }, 0, 255, .flags = FLAGS },
     { "format", "Output pixel format", OFFSET(output_format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS },
+    { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(async_depth), AV_OPT_TYPE_INT, { .i64 = VPP_ASYNC_DEPTH_DEFAULT }, 0, INT_MAX, .flags = FLAGS },
 
     { NULL }
 };
@@ -303,6 +308,7 @@  static int config_output(AVFilterLink *outlink)
     param.filter_frame  = NULL;
     param.num_ext_buf   = 0;
     param.ext_buf       = ext_buf;
+    param.async_depth   = vpp->async_depth;
 
     if (inlink->format == AV_PIX_FMT_QSV) {
          if (!inlink->hw_frames_ctx || !inlink->hw_frames_ctx->data)
@@ -467,23 +473,64 @@  static int config_output(AVFilterLink *outlink)
     return 0;
 }
 
-static int filter_frame(AVFilterLink *inlink, AVFrame *picref)
+static int activate(AVFilterContext *ctx)
 {
-    int              ret = 0;
-    AVFilterContext  *ctx = inlink->dst;
-    VPPContext       *vpp = inlink->dst->priv;
-    AVFilterLink     *outlink = ctx->outputs[0];
-
-    if (vpp->qsv) {
-        ret = ff_qsvvpp_filter_frame(vpp->qsv, inlink, picref);
-        av_frame_free(&picref);
+    AVFilterLink *inlink = ctx->inputs[0];
+    AVFilterLink *outlink = ctx->outputs[0];
+    VPPContext *s =ctx->priv;
+    QSVVPPContext *qsv = s->qsv;
+    AVFrame *in = NULL;
+    int ret, status;
+    int64_t pts;
+
+    FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
+
+    if (!s->eof) {
+        ret = ff_inlink_consume_frame(inlink, &in);
+        if (ret < 0)
+            return ret;
+
+        if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
+            if (status == AVERROR_EOF) {
+                s->eof = 1;
+            }
+        }
+    }
+
+    if (qsv) {
+        if (in || s->eof) {
+            qsv->eof = s->eof;
+            ret = ff_qsvvpp_filter_frame(qsv, inlink, in);
+            av_frame_free(&in);
+
+            if (s->eof) {
+                ff_outlink_set_status(outlink, status, pts);
+                return 0;
+            }
+
+            if (qsv->got_frame) {
+                qsv->got_frame = 0;
+                return ret;
+            }
+        }
     } else {
-        if (picref->pts != AV_NOPTS_VALUE)
-            picref->pts = av_rescale_q(picref->pts, inlink->time_base, outlink->time_base);
-        ret = ff_filter_frame(outlink, picref);
+        if (in) {
+            if (in->pts != AV_NOPTS_VALUE)
+                in->pts = av_rescale_q(in->pts, inlink->time_base, outlink->time_base);
+
+            ret = ff_filter_frame(outlink, in);
+            return ret;
+        }
     }
 
-    return ret;
+    if (s->eof) {
+        ff_outlink_set_status(outlink, status, pts);
+        return 0;
+    } else {
+        FF_FILTER_FORWARD_WANTED(outlink, inlink);
+    }
+
+    return FFERROR_NOT_READY;
 }
 
 static int query_formats(AVFilterContext *ctx)
@@ -531,7 +578,6 @@  static const AVFilterPad vpp_inputs[] = {
         .name          = "default",
         .type          = AVMEDIA_TYPE_VIDEO,
         .config_props  = config_input,
-        .filter_frame  = filter_frame,
     },
     { NULL }
 };
@@ -554,6 +600,7 @@  AVFilter ff_vf_vpp_qsv = {
     .uninit        = vpp_uninit,
     .inputs        = vpp_inputs,
     .outputs       = vpp_outputs,
+    .activate      = activate,
     .priv_class    = &vpp_class,
     .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
 };