Message ID | 20210127014213.13461-1-fei.w.wang@intel.com |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel,v1] lavfi/qsvvpp: support async depth | expand |
Context | Check | Description |
---|---|---|
andriy/x86_make | success | Make finished |
andriy/x86_make_fate | success | Make fate finished |
andriy/PPC64_make | success | Make finished |
andriy/PPC64_make_fate | success | Make fate finished |
On Wed, 2021-01-27 at 09:42 +0800, Fei Wang wrote: > Async depth will allow qsv filter cache few frames, and avoid force > switch and end filter task frame by frame. This change will improve > performance for some multi-task case, for example 1:N transcode( > decode + vpp + encode) with all QSV plugins. > > Signed-off-by: Fei Wang <fei.w.wang@intel.com> > --- > libavfilter/qsvvpp.c | 147 ++++++++++++++++++----------- > -- > libavfilter/qsvvpp.h | 42 ++++++++- > libavfilter/vf_deinterlace_qsv.c | 8 -- > libavfilter/vf_vpp_qsv.c | 75 +++++++++++++--- > 4 files changed, 187 insertions(+), 85 deletions(-) > > diff --git a/libavfilter/qsvvpp.c b/libavfilter/qsvvpp.c > index f216b3f248..2e824e67e7 100644 > --- a/libavfilter/qsvvpp.c > +++ b/libavfilter/qsvvpp.c > @@ -27,6 +27,7 @@ > #include "libavutil/hwcontext_qsv.h" > #include "libavutil/time.h" > #include "libavutil/pixdesc.h" > +#include "libavutil/fifo.h" > > #include "internal.h" > #include "qsvvpp.h" > @@ -37,37 +38,6 @@ > #define IS_OPAQUE_MEMORY(mode) (mode & MFX_MEMTYPE_OPAQUE_FRAME) > #define IS_SYSTEM_MEMORY(mode) (mode & MFX_MEMTYPE_SYSTEM_MEMORY) > > -typedef struct QSVFrame { > - AVFrame *frame; > - mfxFrameSurface1 *surface; > - mfxFrameSurface1 surface_internal; /* for system memory */ > - struct QSVFrame *next; > -} QSVFrame; > - > -/* abstract struct for all QSV filters */ > -struct QSVVPPContext { > - mfxSession session; > - int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame);/* > callback */ > - enum AVPixelFormat out_sw_format; /* Real output format */ > - mfxVideoParam vpp_param; > - mfxFrameInfo *frame_infos; /* frame info for each > input */ > - > - /* members related to the input/output surface */ > - int in_mem_mode; > - int out_mem_mode; > - QSVFrame *in_frame_list; > - QSVFrame *out_frame_list; > - int nb_surface_ptrs_in; > - int nb_surface_ptrs_out; > - mfxFrameSurface1 **surface_ptrs_in; > - mfxFrameSurface1 **surface_ptrs_out; > - > - /* MFXVPP extern parameters */ > - mfxExtOpaqueSurfaceAlloc opaque_alloc; > - mfxExtBuffer **ext_buffers; > - int nb_ext_buffers; > -}; > - > static const mfxHandleType handle_types[] = { > MFX_HANDLE_VA_DISPLAY, > MFX_HANDLE_D3D9_DEVICE_MANAGER, > @@ -336,9 +306,9 @@ static int fill_frameinfo_by_link(mfxFrameInfo > *frameinfo, AVFilterLink *link) > static void clear_unused_frames(QSVFrame *list) > { > while (list) { > - if (list->surface && !list->surface->Data.Locked) { > - list->surface = NULL; > + if (list->used && !list->queued && !list- > >surface.Data.Locked) { > av_frame_free(&list->frame); > + list->used = 0; > } > list = list->next; > } > @@ -361,8 +331,10 @@ static QSVFrame *get_free_frame(QSVFrame **list) > QSVFrame *out = *list; > > for (; out; out = out->next) { > - if (!out->surface) > + if (!out->used) { > + out->used = 1; > break; > + } > } > > if (!out) { > @@ -371,6 +343,7 @@ static QSVFrame *get_free_frame(QSVFrame **list) > av_log(NULL, AV_LOG_ERROR, "Can't alloc new output > frame.\n"); > return NULL; > } > + out->used = 1; > out->next = *list; > *list = out; > } > @@ -402,7 +375,7 @@ static QSVFrame *submit_frame(QSVVPPContext *s, > AVFilterLink *inlink, AVFrame *p > return NULL; > } > qsv_frame->frame = av_frame_clone(picref); > - qsv_frame->surface = (mfxFrameSurface1 *)qsv_frame->frame- > >data[3]; > + qsv_frame->surface = *(mfxFrameSurface1 *)qsv_frame->frame- > >data[3]; > } else { > /* make a copy if the input is not padded as libmfx requires > */ > if (picref->height & 31 || picref->linesize[0] & 31) { > @@ -425,27 +398,26 @@ static QSVFrame *submit_frame(QSVVPPContext *s, > AVFilterLink *inlink, AVFrame *p > qsv_frame->frame = av_frame_clone(picref); > > if (map_frame_to_surface(qsv_frame->frame, > - &qsv_frame->surface_internal) < 0) { > + &qsv_frame->surface) < 0) { > av_log(ctx, AV_LOG_ERROR, "Unsupported frame.\n"); > return NULL; > } > - qsv_frame->surface = &qsv_frame->surface_internal; > } > > - qsv_frame->surface->Info = s- > >frame_infos[FF_INLINK_IDX(inlink)]; > - qsv_frame->surface->Data.TimeStamp = av_rescale_q(qsv_frame- > >frame->pts, > + qsv_frame->surface.Info = s- > >frame_infos[FF_INLINK_IDX(inlink)]; > + qsv_frame->surface.Data.TimeStamp = av_rescale_q(qsv_frame- > >frame->pts, > inlink- > >time_base, default_tb); > > - qsv_frame->surface->Info.PicStruct = > + qsv_frame->surface.Info.PicStruct = > !qsv_frame->frame->interlaced_frame ? > MFX_PICSTRUCT_PROGRESSIVE : > (qsv_frame->frame->top_field_first ? > MFX_PICSTRUCT_FIELD_TFF : > MFX_PICSTRUCT_FIELD > _BFF); > if (qsv_frame->frame->repeat_pict == 1) > - qsv_frame->surface->Info.PicStruct |= > MFX_PICSTRUCT_FIELD_REPEATED; > + qsv_frame->surface.Info.PicStruct |= > MFX_PICSTRUCT_FIELD_REPEATED; > else if (qsv_frame->frame->repeat_pict == 2) > - qsv_frame->surface->Info.PicStruct |= > MFX_PICSTRUCT_FRAME_DOUBLING; > + qsv_frame->surface.Info.PicStruct |= > MFX_PICSTRUCT_FRAME_DOUBLING; > else if (qsv_frame->frame->repeat_pict == 4) > - qsv_frame->surface->Info.PicStruct |= > MFX_PICSTRUCT_FRAME_TRIPLING; > + qsv_frame->surface.Info.PicStruct |= > MFX_PICSTRUCT_FRAME_TRIPLING; > > return qsv_frame; > } > @@ -476,7 +448,7 @@ static QSVFrame *query_frame(QSVVPPContext *s, > AVFilterLink *outlink) > return NULL; > } > > - out_frame->surface = (mfxFrameSurface1 *)out_frame->frame- > >data[3]; > + out_frame->surface = *(mfxFrameSurface1 *)out_frame->frame- > >data[3]; > } else { > /* Get a frame with aligned dimensions. > * Libmfx need system memory being 128x64 aligned */ > @@ -490,14 +462,12 @@ static QSVFrame *query_frame(QSVVPPContext *s, > AVFilterLink *outlink) > out_frame->frame->height = outlink->h; > > ret = map_frame_to_surface(out_frame->frame, > - &out_frame->surface_internal); > + &out_frame->surface); > if (ret < 0) > return NULL; > - > - out_frame->surface = &out_frame->surface_internal; > } > > - out_frame->surface->Info = s->vpp_param.vpp.Out; > + out_frame->surface.Info = s->vpp_param.vpp.Out; > > return out_frame; > } > @@ -666,6 +636,16 @@ static int init_vpp_session(AVFilterContext > *avctx, QSVVPPContext *s) > return 0; > } > > +static unsigned int qsv_fifo_item_size(void) > +{ > + return sizeof(mfxSyncPoint*) + sizeof(QSVFrame*); > +} > + > +static unsigned int qsv_fifo_size(const AVFifoBuffer* fifo) > +{ > + return av_fifo_size(fifo)/qsv_fifo_item_size(); > +} > + > int ff_qsvvpp_create(AVFilterContext *avctx, QSVVPPContext **vpp, > QSVVPPParam *param) > { > int i; > @@ -738,7 +718,17 @@ int ff_qsvvpp_create(AVFilterContext *avctx, > QSVVPPContext **vpp, QSVVPPParam *p > s->vpp_param.ExtParam = param->ext_buf; > } > > - s->vpp_param.AsyncDepth = 1; > + s->got_frame = 0; > + > + /** keep fifo size at least 1. Even when async_depth is 0, fifo > is used. */ > + s->async_fifo = av_fifo_alloc((param->async_depth + 1) * > qsv_fifo_item_size()); > + s->async_depth = param->async_depth; > + if (!s->async_fifo) { > + ret = AVERROR(ENOMEM); > + goto failed; > + } > + > + s->vpp_param.AsyncDepth = param->async_depth; > > if (IS_SYSTEM_MEMORY(s->in_mem_mode)) > s->vpp_param.IOPattern |= MFX_IOPATTERN_IN_SYSTEM_MEMORY; > @@ -793,6 +783,7 @@ int ff_qsvvpp_free(QSVVPPContext **vpp) > av_freep(&s->surface_ptrs_out); > av_freep(&s->ext_buffers); > av_freep(&s->frame_infos); > + av_fifo_free(s->async_fifo); > av_freep(vpp); > > return 0; > @@ -803,9 +794,29 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s, > AVFilterLink *inlink, AVFrame *picr > AVFilterContext *ctx = inlink->dst; > AVFilterLink *outlink = ctx->outputs[0]; > mfxSyncPoint sync; > - QSVFrame *in_frame, *out_frame; > + QSVFrame *in_frame, *out_frame, *tmp; > int ret, filter_ret; > > + while (s->eof && qsv_fifo_size(s->async_fifo)) { > + av_fifo_generic_read(s->async_fifo, &tmp, sizeof(tmp), > NULL); > + av_fifo_generic_read(s->async_fifo, &sync, sizeof(sync), > NULL); > + if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0) > + av_log(ctx, AV_LOG_WARNING, "Sync failed.\n"); > + > + filter_ret = s->filter_frame(outlink, tmp->frame); > + if (filter_ret < 0) { > + av_frame_free(&tmp->frame); > + ret = filter_ret; > + break; > + } > + tmp->queued = 0; > + s->got_frame = 1; > + tmp->frame = NULL; > + }; > + > + if (!picref) > + return 0; > + > in_frame = submit_frame(s, inlink, picref); > if (!in_frame) { > av_log(ctx, AV_LOG_ERROR, "Failed to submit frame on > input[%d]\n", > @@ -821,8 +832,8 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s, > AVFilterLink *inlink, AVFrame *picr > } > > do { > - ret = MFXVideoVPP_RunFrameVPPAsync(s->session, in_frame- > >surface, > - out_frame->surface, > NULL, &sync); > + ret = MFXVideoVPP_RunFrameVPPAsync(s->session, > &in_frame->surface, > + &out_frame->surface, > NULL, &sync); > if (ret == MFX_WRN_DEVICE_BUSY) > av_usleep(500); > } while (ret == MFX_WRN_DEVICE_BUSY); > @@ -833,20 +844,32 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s, > AVFilterLink *inlink, AVFrame *picr > ret = AVERROR(EAGAIN); > break; > } > + out_frame->frame->pts = av_rescale_q(out_frame- > >surface.Data.TimeStamp, > + default_tb, outlink- > >time_base); > > - if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0) > - av_log(ctx, AV_LOG_WARNING, "Sync failed.\n"); > + out_frame->queued = 1; > + av_fifo_generic_write(s->async_fifo, &out_frame, > sizeof(out_frame), NULL); > + av_fifo_generic_write(s->async_fifo, &sync, sizeof(sync), > NULL); > > - out_frame->frame->pts = av_rescale_q(out_frame->surface- > >Data.TimeStamp, > - default_tb, outlink- > >time_base); > > - filter_ret = s->filter_frame(outlink, out_frame->frame); > - if (filter_ret < 0) { > - av_frame_free(&out_frame->frame); > - ret = filter_ret; > - break; > + if (qsv_fifo_size(s->async_fifo) > s->async_depth) { > + av_fifo_generic_read(s->async_fifo, &tmp, sizeof(tmp), > NULL); > + av_fifo_generic_read(s->async_fifo, &sync, sizeof(sync), > NULL); > + > + if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < > 0) > + av_log(ctx, AV_LOG_WARNING, "Sync failed.\n"); > + > + filter_ret = s->filter_frame(outlink, tmp->frame); > + if (filter_ret < 0) { > + av_frame_free(&tmp->frame); > + ret = filter_ret; > + break; > + } > + > + tmp->queued = 0; > + s->got_frame = 1; > + tmp->frame = NULL; > } > - out_frame->frame = NULL; > } while(ret == MFX_ERR_MORE_SURFACE); > > return ret; > diff --git a/libavfilter/qsvvpp.h b/libavfilter/qsvvpp.h > index b4baeedf9e..48c8ffc2d2 100644 > --- a/libavfilter/qsvvpp.h > +++ b/libavfilter/qsvvpp.h > @@ -27,6 +27,7 @@ > #include <mfx/mfxvideo.h> > > #include "avfilter.h" > +#include "libavutil/fifo.h" > > #define FF_INLINK_IDX(link) ((int)((link)->dstpad - (link)->dst- > >input_pads)) > #define FF_OUTLINK_IDX(link) ((int)((link)->srcpad - (link)->src- > >output_pads)) > @@ -39,7 +40,44 @@ > ((MFX_VERSION.Major > (MAJOR)) || \ > (MFX_VERSION.Major == (MAJOR) && MFX_VERSION.Minor >= (MINOR))) > > -typedef struct QSVVPPContext QSVVPPContext; > +#define VPP_ASYNC_DEPTH_DEFAULT 1 > + > +typedef struct QSVFrame { > + AVFrame *frame; > + mfxFrameSurface1 surface; > + struct QSVFrame *next; > + int queued; > + int used; > +} QSVFrame; > + > +typedef struct QSVVPPContext { > + mfxSession session; > + int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame); > /**< callback */ > + enum AVPixelFormat out_sw_format; /**< Real output format */ > + mfxVideoParam vpp_param; > + mfxFrameInfo *frame_infos; /**< frame info for each > input */ > + > + /** members related to the input/output surface */ > + int in_mem_mode; > + int out_mem_mode; > + QSVFrame *in_frame_list; > + QSVFrame *out_frame_list; > + int nb_surface_ptrs_in; > + int nb_surface_ptrs_out; > + mfxFrameSurface1 **surface_ptrs_in; > + mfxFrameSurface1 **surface_ptrs_out; > + > + /** MFXVPP extern parameters */ > + mfxExtOpaqueSurfaceAlloc opaque_alloc; > + mfxExtBuffer **ext_buffers; > + int nb_ext_buffers; > + > + int got_frame; > + int async_depth; > + int eof; > + /** order with frame_out, sync */ > + AVFifoBuffer *async_fifo; > +} QSVVPPContext; > > typedef struct QSVVPPCrop { > int in_idx; ///< Input index > @@ -60,6 +98,8 @@ typedef struct QSVVPPParam { > /* Crop information for each input, if needed */ > int num_crop; > QSVVPPCrop *crop; > + > + int async_depth; > } QSVVPPParam; > > /* create and initialize the QSV session */ > diff --git a/libavfilter/vf_deinterlace_qsv.c > b/libavfilter/vf_deinterlace_qsv.c > index 89a282f99e..a620567de2 100644 > --- a/libavfilter/vf_deinterlace_qsv.c > +++ b/libavfilter/vf_deinterlace_qsv.c > @@ -47,14 +47,6 @@ enum { > QSVDEINT_MORE_INPUT, > }; > > -typedef struct QSVFrame { > - AVFrame *frame; > - mfxFrameSurface1 surface; > - int used; > - > - struct QSVFrame *next; > -} QSVFrame; > - > typedef struct QSVDeintContext { > const AVClass *class; > > diff --git a/libavfilter/vf_vpp_qsv.c b/libavfilter/vf_vpp_qsv.c > index 5d57707455..83bdf1276c 100644 > --- a/libavfilter/vf_vpp_qsv.c > +++ b/libavfilter/vf_vpp_qsv.c > @@ -32,6 +32,7 @@ > #include "formats.h" > #include "internal.h" > #include "avfilter.h" > +#include "filters.h" > #include "libavcodec/avcodec.h" > #include "libavformat/avformat.h" > > @@ -93,6 +94,9 @@ typedef struct VPPContext{ > char *cx, *cy, *cw, *ch; > char *ow, *oh; > char *output_format_str; > + > + int async_depth; > + int eof; > } VPPContext; > > static const AVOption options[] = { > @@ -128,6 +132,7 @@ static const AVOption options[] = { > { "h", "Output video height", OFFSET(oh), > AV_OPT_TYPE_STRING, { .str="w*ch/cw" }, 0, 255, .flags = FLAGS }, > { "height", "Output video height", OFFSET(oh), > AV_OPT_TYPE_STRING, { .str="w*ch/cw" }, 0, 255, .flags = FLAGS }, > { "format", "Output pixel format", OFFSET(output_format_str), > AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS }, > + { "async_depth", "Internal parallelization depth, the higher the > value the higher the latency.", OFFSET(async_depth), AV_OPT_TYPE_INT, > { .i64 = VPP_ASYNC_DEPTH_DEFAULT }, 0, INT_MAX, .flags = FLAGS }, > > { NULL } > }; > @@ -303,6 +308,7 @@ static int config_output(AVFilterLink *outlink) > param.filter_frame = NULL; > param.num_ext_buf = 0; > param.ext_buf = ext_buf; > + param.async_depth = vpp->async_depth; > > if (inlink->format == AV_PIX_FMT_QSV) { > if (!inlink->hw_frames_ctx || !inlink->hw_frames_ctx->data) > @@ -467,23 +473,64 @@ static int config_output(AVFilterLink *outlink) > return 0; > } > > -static int filter_frame(AVFilterLink *inlink, AVFrame *picref) > +static int activate(AVFilterContext *ctx) > { > - int ret = 0; > - AVFilterContext *ctx = inlink->dst; > - VPPContext *vpp = inlink->dst->priv; > - AVFilterLink *outlink = ctx->outputs[0]; > - > - if (vpp->qsv) { > - ret = ff_qsvvpp_filter_frame(vpp->qsv, inlink, picref); > - av_frame_free(&picref); > + AVFilterLink *inlink = ctx->inputs[0]; > + AVFilterLink *outlink = ctx->outputs[0]; > + VPPContext *s =ctx->priv; > + QSVVPPContext *qsv = s->qsv; > + AVFrame *in = NULL; > + int ret, status; > + int64_t pts; > + > + FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink); > + > + if (!s->eof) { > + ret = ff_inlink_consume_frame(inlink, &in); > + if (ret < 0) > + return ret; > + > + if (ff_inlink_acknowledge_status(inlink, &status, &pts)) { > + if (status == AVERROR_EOF) { > + s->eof = 1; > + } > + } > + } > + > + if (qsv) { > + if (in || s->eof) { > + qsv->eof = s->eof; > + ret = ff_qsvvpp_filter_frame(qsv, inlink, in); > + av_frame_free(&in); > + > + if (s->eof) { > + ff_outlink_set_status(outlink, status, pts); > + return 0; > + } > + > + if (qsv->got_frame) { > + qsv->got_frame = 0; > + return ret; > + } > + } > } else { > - if (picref->pts != AV_NOPTS_VALUE) > - picref->pts = av_rescale_q(picref->pts, inlink- > >time_base, outlink->time_base); > - ret = ff_filter_frame(outlink, picref); > + if (in) { > + if (in->pts != AV_NOPTS_VALUE) > + in->pts = av_rescale_q(in->pts, inlink->time_base, > outlink->time_base); > + > + ret = ff_filter_frame(outlink, in); > + return ret; > + } > } > > - return ret; > + if (s->eof) { > + ff_outlink_set_status(outlink, status, pts); > + return 0; > + } else { > + FF_FILTER_FORWARD_WANTED(outlink, inlink); > + } > + > + return FFERROR_NOT_READY; > } > > static int query_formats(AVFilterContext *ctx) > @@ -531,7 +578,6 @@ static const AVFilterPad vpp_inputs[] = { > .name = "default", > .type = AVMEDIA_TYPE_VIDEO, > .config_props = config_input, > - .filter_frame = filter_frame, > }, > { NULL } > }; > @@ -554,6 +600,7 @@ AVFilter ff_vf_vpp_qsv = { > .uninit = vpp_uninit, > .inputs = vpp_inputs, > .outputs = vpp_outputs, > + .activate = activate, > .priv_class = &vpp_class, > .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, > }; Ping, thanks Fei
> -----Original Message----- > From: Wang, Fei W <fei.w.wang@intel.com> > Sent: Wednesday, February 3, 2021 9:09 AM > To: ffmpeg-devel@ffmpeg.org > Subject: Re: [PATCH v1] lavfi/qsvvpp: support async depth > > On Wed, 2021-01-27 at 09:42 +0800, Fei Wang wrote: > > Async depth will allow qsv filter cache few frames, and avoid force > > switch and end filter task frame by frame. This change will improve > > performance for some multi-task case, for example 1:N transcode( > > decode + vpp + encode) with all QSV plugins. > > > > Signed-off-by: Fei Wang <fei.w.wang@intel.com> > > --- > > libavfilter/qsvvpp.c | 147 ++++++++++++++++++----------- > > -- > > libavfilter/qsvvpp.h | 42 ++++++++- > > libavfilter/vf_deinterlace_qsv.c | 8 -- > > libavfilter/vf_vpp_qsv.c | 75 +++++++++++++--- > > 4 files changed, 187 insertions(+), 85 deletions(-) > > > > diff --git a/libavfilter/qsvvpp.c b/libavfilter/qsvvpp.c index > > f216b3f248..2e824e67e7 100644 > > --- a/libavfilter/qsvvpp.c > > +++ b/libavfilter/qsvvpp.c > > @@ -27,6 +27,7 @@ > > #include "libavutil/hwcontext_qsv.h" > > #include "libavutil/time.h" > > #include "libavutil/pixdesc.h" > > +#include "libavutil/fifo.h" > > > > #include "internal.h" > > #include "qsvvpp.h" > > @@ -37,37 +38,6 @@ > > #define IS_OPAQUE_MEMORY(mode) (mode & > MFX_MEMTYPE_OPAQUE_FRAME) > > #define IS_SYSTEM_MEMORY(mode) (mode & > MFX_MEMTYPE_SYSTEM_MEMORY) > > > > -typedef struct QSVFrame { > > - AVFrame *frame; > > - mfxFrameSurface1 *surface; > > - mfxFrameSurface1 surface_internal; /* for system memory */ > > - struct QSVFrame *next; > > -} QSVFrame; > > - > > -/* abstract struct for all QSV filters */ -struct QSVVPPContext { > > - mfxSession session; > > - int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame);/* > > callback */ > > - enum AVPixelFormat out_sw_format; /* Real output format */ > > - mfxVideoParam vpp_param; > > - mfxFrameInfo *frame_infos; /* frame info for each > > input */ > > - > > - /* members related to the input/output surface */ > > - int in_mem_mode; > > - int out_mem_mode; > > - QSVFrame *in_frame_list; > > - QSVFrame *out_frame_list; > > - int nb_surface_ptrs_in; > > - int nb_surface_ptrs_out; > > - mfxFrameSurface1 **surface_ptrs_in; > > - mfxFrameSurface1 **surface_ptrs_out; > > - > > - /* MFXVPP extern parameters */ > > - mfxExtOpaqueSurfaceAlloc opaque_alloc; > > - mfxExtBuffer **ext_buffers; > > - int nb_ext_buffers; > > -}; > > - > > static const mfxHandleType handle_types[] = { > > MFX_HANDLE_VA_DISPLAY, > > MFX_HANDLE_D3D9_DEVICE_MANAGER, > > @@ -336,9 +306,9 @@ static int fill_frameinfo_by_link(mfxFrameInfo > > *frameinfo, AVFilterLink *link) > > static void clear_unused_frames(QSVFrame *list) { > > while (list) { > > - if (list->surface && !list->surface->Data.Locked) { > > - list->surface = NULL; > > + if (list->used && !list->queued && !list- > > >surface.Data.Locked) { > > av_frame_free(&list->frame); > > + list->used = 0; > > } > > list = list->next; > > } > > @@ -361,8 +331,10 @@ static QSVFrame *get_free_frame(QSVFrame **list) > > QSVFrame *out = *list; > > > > for (; out; out = out->next) { > > - if (!out->surface) > > + if (!out->used) { > > + out->used = 1; > > break; > > + } > > } > > > > if (!out) { > > @@ -371,6 +343,7 @@ static QSVFrame *get_free_frame(QSVFrame **list) > > av_log(NULL, AV_LOG_ERROR, "Can't alloc new output > > frame.\n"); > > return NULL; > > } > > + out->used = 1; > > out->next = *list; > > *list = out; > > } > > @@ -402,7 +375,7 @@ static QSVFrame *submit_frame(QSVVPPContext *s, > > AVFilterLink *inlink, AVFrame *p > > return NULL; > > } > > qsv_frame->frame = av_frame_clone(picref); > > - qsv_frame->surface = (mfxFrameSurface1 *)qsv_frame->frame- > > >data[3]; > > + qsv_frame->surface = *(mfxFrameSurface1 *)qsv_frame->frame- > > >data[3]; > > } else { > > /* make a copy if the input is not padded as libmfx requires > > */ > > if (picref->height & 31 || picref->linesize[0] & 31) { @@ > > -425,27 +398,26 @@ static QSVFrame *submit_frame(QSVVPPContext *s, > > AVFilterLink *inlink, AVFrame *p > > qsv_frame->frame = av_frame_clone(picref); > > > > if (map_frame_to_surface(qsv_frame->frame, > > - &qsv_frame->surface_internal) < 0) { > > + &qsv_frame->surface) < 0) { > > av_log(ctx, AV_LOG_ERROR, "Unsupported frame.\n"); > > return NULL; > > } > > - qsv_frame->surface = &qsv_frame->surface_internal; > > } > > > > - qsv_frame->surface->Info = s- > > >frame_infos[FF_INLINK_IDX(inlink)]; > > - qsv_frame->surface->Data.TimeStamp = av_rescale_q(qsv_frame- > > >frame->pts, > > + qsv_frame->surface.Info = s- > > >frame_infos[FF_INLINK_IDX(inlink)]; > > + qsv_frame->surface.Data.TimeStamp = av_rescale_q(qsv_frame- > > >frame->pts, > > inlink- > > >time_base, default_tb); > > > > - qsv_frame->surface->Info.PicStruct = > > + qsv_frame->surface.Info.PicStruct = > > !qsv_frame->frame->interlaced_frame ? > > MFX_PICSTRUCT_PROGRESSIVE : > > (qsv_frame->frame->top_field_first ? > > MFX_PICSTRUCT_FIELD_TFF : > > MFX_PICSTRUCT_FIELD > > _BFF); > > if (qsv_frame->frame->repeat_pict == 1) > > - qsv_frame->surface->Info.PicStruct |= > > MFX_PICSTRUCT_FIELD_REPEATED; > > + qsv_frame->surface.Info.PicStruct |= > > MFX_PICSTRUCT_FIELD_REPEATED; > > else if (qsv_frame->frame->repeat_pict == 2) > > - qsv_frame->surface->Info.PicStruct |= > > MFX_PICSTRUCT_FRAME_DOUBLING; > > + qsv_frame->surface.Info.PicStruct |= > > MFX_PICSTRUCT_FRAME_DOUBLING; > > else if (qsv_frame->frame->repeat_pict == 4) > > - qsv_frame->surface->Info.PicStruct |= > > MFX_PICSTRUCT_FRAME_TRIPLING; > > + qsv_frame->surface.Info.PicStruct |= > > MFX_PICSTRUCT_FRAME_TRIPLING; > > > > return qsv_frame; > > } > > @@ -476,7 +448,7 @@ static QSVFrame *query_frame(QSVVPPContext *s, > > AVFilterLink *outlink) > > return NULL; > > } > > > > - out_frame->surface = (mfxFrameSurface1 *)out_frame->frame- > > >data[3]; > > + out_frame->surface = *(mfxFrameSurface1 *)out_frame->frame- > > >data[3]; > > } else { > > /* Get a frame with aligned dimensions. > > * Libmfx need system memory being 128x64 aligned */ @@ > > -490,14 +462,12 @@ static QSVFrame *query_frame(QSVVPPContext *s, > > AVFilterLink *outlink) > > out_frame->frame->height = outlink->h; > > > > ret = map_frame_to_surface(out_frame->frame, > > - &out_frame->surface_internal); > > + &out_frame->surface); > > if (ret < 0) > > return NULL; > > - > > - out_frame->surface = &out_frame->surface_internal; > > } > > > > - out_frame->surface->Info = s->vpp_param.vpp.Out; > > + out_frame->surface.Info = s->vpp_param.vpp.Out; > > > > return out_frame; > > } > > @@ -666,6 +636,16 @@ static int init_vpp_session(AVFilterContext > > *avctx, QSVVPPContext *s) > > return 0; > > } > > > > +static unsigned int qsv_fifo_item_size(void) { > > + return sizeof(mfxSyncPoint*) + sizeof(QSVFrame*); } > > + > > +static unsigned int qsv_fifo_size(const AVFifoBuffer* fifo) { > > + return av_fifo_size(fifo)/qsv_fifo_item_size(); > > +} > > + > > int ff_qsvvpp_create(AVFilterContext *avctx, QSVVPPContext **vpp, > > QSVVPPParam *param) { > > int i; > > @@ -738,7 +718,17 @@ int ff_qsvvpp_create(AVFilterContext *avctx, > > QSVVPPContext **vpp, QSVVPPParam *p > > s->vpp_param.ExtParam = param->ext_buf; > > } > > > > - s->vpp_param.AsyncDepth = 1; > > + s->got_frame = 0; > > + > > + /** keep fifo size at least 1. Even when async_depth is 0, fifo > > is used. */ > > + s->async_fifo = av_fifo_alloc((param->async_depth + 1) * > > qsv_fifo_item_size()); > > + s->async_depth = param->async_depth; > > + if (!s->async_fifo) { > > + ret = AVERROR(ENOMEM); > > + goto failed; > > + } > > + > > + s->vpp_param.AsyncDepth = param->async_depth; > > > > if (IS_SYSTEM_MEMORY(s->in_mem_mode)) > > s->vpp_param.IOPattern |= MFX_IOPATTERN_IN_SYSTEM_MEMORY; > @@ > > -793,6 +783,7 @@ int ff_qsvvpp_free(QSVVPPContext **vpp) > > av_freep(&s->surface_ptrs_out); > > av_freep(&s->ext_buffers); > > av_freep(&s->frame_infos); > > + av_fifo_free(s->async_fifo); > > av_freep(vpp); > > > > return 0; > > @@ -803,9 +794,29 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s, > > AVFilterLink *inlink, AVFrame *picr > > AVFilterContext *ctx = inlink->dst; > > AVFilterLink *outlink = ctx->outputs[0]; > > mfxSyncPoint sync; > > - QSVFrame *in_frame, *out_frame; > > + QSVFrame *in_frame, *out_frame, *tmp; > > int ret, filter_ret; > > > > + while (s->eof && qsv_fifo_size(s->async_fifo)) { > > + av_fifo_generic_read(s->async_fifo, &tmp, sizeof(tmp), > > NULL); > > + av_fifo_generic_read(s->async_fifo, &sync, sizeof(sync), > > NULL); > > + if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0) > > + av_log(ctx, AV_LOG_WARNING, "Sync failed.\n"); > > + > > + filter_ret = s->filter_frame(outlink, tmp->frame); > > + if (filter_ret < 0) { > > + av_frame_free(&tmp->frame); > > + ret = filter_ret; > > + break; > > + } > > + tmp->queued = 0; > > + s->got_frame = 1; > > + tmp->frame = NULL; > > + }; > > + > > + if (!picref) > > + return 0; > > + > > in_frame = submit_frame(s, inlink, picref); > > if (!in_frame) { > > av_log(ctx, AV_LOG_ERROR, "Failed to submit frame on > > input[%d]\n", @@ -821,8 +832,8 @@ int > > ff_qsvvpp_filter_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame > > *picr > > } > > > > do { > > - ret = MFXVideoVPP_RunFrameVPPAsync(s->session, in_frame- > > >surface, > > - out_frame->surface, > > NULL, &sync); > > + ret = MFXVideoVPP_RunFrameVPPAsync(s->session, > > &in_frame->surface, > > + &out_frame->surface, > > NULL, &sync); > > if (ret == MFX_WRN_DEVICE_BUSY) > > av_usleep(500); > > } while (ret == MFX_WRN_DEVICE_BUSY); @@ -833,20 +844,32 @@ > > int ff_qsvvpp_filter_frame(QSVVPPContext *s, AVFilterLink *inlink, > > AVFrame *picr > > ret = AVERROR(EAGAIN); > > break; > > } > > + out_frame->frame->pts = av_rescale_q(out_frame- > > >surface.Data.TimeStamp, > > + default_tb, outlink- > > >time_base); > > > > - if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0) > > - av_log(ctx, AV_LOG_WARNING, "Sync failed.\n"); > > + out_frame->queued = 1; > > + av_fifo_generic_write(s->async_fifo, &out_frame, > > sizeof(out_frame), NULL); > > + av_fifo_generic_write(s->async_fifo, &sync, sizeof(sync), > > NULL); > > > > - out_frame->frame->pts = av_rescale_q(out_frame->surface- > > >Data.TimeStamp, > > - default_tb, outlink- > > >time_base); > > > > - filter_ret = s->filter_frame(outlink, out_frame->frame); > > - if (filter_ret < 0) { > > - av_frame_free(&out_frame->frame); > > - ret = filter_ret; > > - break; > > + if (qsv_fifo_size(s->async_fifo) > s->async_depth) { > > + av_fifo_generic_read(s->async_fifo, &tmp, sizeof(tmp), > > NULL); > > + av_fifo_generic_read(s->async_fifo, &sync, sizeof(sync), > > NULL); > > + > > + if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < > > 0) > > + av_log(ctx, AV_LOG_WARNING, "Sync failed.\n"); > > + > > + filter_ret = s->filter_frame(outlink, tmp->frame); > > + if (filter_ret < 0) { > > + av_frame_free(&tmp->frame); > > + ret = filter_ret; > > + break; > > + } > > + > > + tmp->queued = 0; > > + s->got_frame = 1; > > + tmp->frame = NULL; > > } > > - out_frame->frame = NULL; > > } while(ret == MFX_ERR_MORE_SURFACE); > > > > return ret; > > diff --git a/libavfilter/qsvvpp.h b/libavfilter/qsvvpp.h index > > b4baeedf9e..48c8ffc2d2 100644 > > --- a/libavfilter/qsvvpp.h > > +++ b/libavfilter/qsvvpp.h > > @@ -27,6 +27,7 @@ > > #include <mfx/mfxvideo.h> > > > > #include "avfilter.h" > > +#include "libavutil/fifo.h" > > > > #define FF_INLINK_IDX(link) ((int)((link)->dstpad - (link)->dst- > > >input_pads)) > > #define FF_OUTLINK_IDX(link) ((int)((link)->srcpad - (link)->src- > > >output_pads)) > > @@ -39,7 +40,44 @@ > > ((MFX_VERSION.Major > (MAJOR)) || \ > > (MFX_VERSION.Major == (MAJOR) && MFX_VERSION.Minor >= (MINOR))) > > > > -typedef struct QSVVPPContext QSVVPPContext; > > +#define VPP_ASYNC_DEPTH_DEFAULT 1 > > + > > +typedef struct QSVFrame { > > + AVFrame *frame; > > + mfxFrameSurface1 surface; > > + struct QSVFrame *next; > > + int queued; > > + int used; > > +} QSVFrame; > > + > > +typedef struct QSVVPPContext { > > + mfxSession session; > > + int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame); > > /**< callback */ > > + enum AVPixelFormat out_sw_format; /**< Real output format */ > > + mfxVideoParam vpp_param; > > + mfxFrameInfo *frame_infos; /**< frame info for each > > input */ > > + > > + /** members related to the input/output surface */ > > + int in_mem_mode; > > + int out_mem_mode; > > + QSVFrame *in_frame_list; > > + QSVFrame *out_frame_list; > > + int nb_surface_ptrs_in; > > + int nb_surface_ptrs_out; > > + mfxFrameSurface1 **surface_ptrs_in; > > + mfxFrameSurface1 **surface_ptrs_out; > > + > > + /** MFXVPP extern parameters */ > > + mfxExtOpaqueSurfaceAlloc opaque_alloc; > > + mfxExtBuffer **ext_buffers; > > + int nb_ext_buffers; > > + > > + int got_frame; > > + int async_depth; > > + int eof; > > + /** order with frame_out, sync */ > > + AVFifoBuffer *async_fifo; > > +} QSVVPPContext; > > > > typedef struct QSVVPPCrop { > > int in_idx; ///< Input index > > @@ -60,6 +98,8 @@ typedef struct QSVVPPParam { > > /* Crop information for each input, if needed */ > > int num_crop; > > QSVVPPCrop *crop; > > + > > + int async_depth; > > } QSVVPPParam; > > > > /* create and initialize the QSV session */ diff --git > > a/libavfilter/vf_deinterlace_qsv.c > > b/libavfilter/vf_deinterlace_qsv.c > > index 89a282f99e..a620567de2 100644 > > --- a/libavfilter/vf_deinterlace_qsv.c > > +++ b/libavfilter/vf_deinterlace_qsv.c > > @@ -47,14 +47,6 @@ enum { > > QSVDEINT_MORE_INPUT, > > }; > > > > -typedef struct QSVFrame { > > - AVFrame *frame; > > - mfxFrameSurface1 surface; > > - int used; > > - > > - struct QSVFrame *next; > > -} QSVFrame; > > - > > typedef struct QSVDeintContext { > > const AVClass *class; > > > > diff --git a/libavfilter/vf_vpp_qsv.c b/libavfilter/vf_vpp_qsv.c index > > 5d57707455..83bdf1276c 100644 > > --- a/libavfilter/vf_vpp_qsv.c > > +++ b/libavfilter/vf_vpp_qsv.c > > @@ -32,6 +32,7 @@ > > #include "formats.h" > > #include "internal.h" > > #include "avfilter.h" > > +#include "filters.h" > > #include "libavcodec/avcodec.h" > > #include "libavformat/avformat.h" > > > > @@ -93,6 +94,9 @@ typedef struct VPPContext{ > > char *cx, *cy, *cw, *ch; > > char *ow, *oh; > > char *output_format_str; > > + > > + int async_depth; > > + int eof; > > } VPPContext; > > > > static const AVOption options[] = { > > @@ -128,6 +132,7 @@ static const AVOption options[] = { > > { "h", "Output video height", OFFSET(oh), > > AV_OPT_TYPE_STRING, { .str="w*ch/cw" }, 0, 255, .flags = FLAGS }, > > { "height", "Output video height", OFFSET(oh), > > AV_OPT_TYPE_STRING, { .str="w*ch/cw" }, 0, 255, .flags = FLAGS }, > > { "format", "Output pixel format", OFFSET(output_format_str), > > AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS }, > > + { "async_depth", "Internal parallelization depth, the higher the > > value the higher the latency.", OFFSET(async_depth), AV_OPT_TYPE_INT, > > { .i64 = VPP_ASYNC_DEPTH_DEFAULT }, 0, INT_MAX, .flags = FLAGS }, > > > > { NULL } > > }; > > @@ -303,6 +308,7 @@ static int config_output(AVFilterLink *outlink) > > param.filter_frame = NULL; > > param.num_ext_buf = 0; > > param.ext_buf = ext_buf; > > + param.async_depth = vpp->async_depth; > > > > if (inlink->format == AV_PIX_FMT_QSV) { > > if (!inlink->hw_frames_ctx || !inlink->hw_frames_ctx->data) > > @@ -467,23 +473,64 @@ static int config_output(AVFilterLink *outlink) > > return 0; > > } > > > > -static int filter_frame(AVFilterLink *inlink, AVFrame *picref) > > +static int activate(AVFilterContext *ctx) > > { > > - int ret = 0; > > - AVFilterContext *ctx = inlink->dst; > > - VPPContext *vpp = inlink->dst->priv; > > - AVFilterLink *outlink = ctx->outputs[0]; > > - > > - if (vpp->qsv) { > > - ret = ff_qsvvpp_filter_frame(vpp->qsv, inlink, picref); > > - av_frame_free(&picref); > > + AVFilterLink *inlink = ctx->inputs[0]; > > + AVFilterLink *outlink = ctx->outputs[0]; > > + VPPContext *s =ctx->priv; > > + QSVVPPContext *qsv = s->qsv; > > + AVFrame *in = NULL; > > + int ret, status; > > + int64_t pts; > > + > > + FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink); > > + > > + if (!s->eof) { > > + ret = ff_inlink_consume_frame(inlink, &in); > > + if (ret < 0) > > + return ret; > > + > > + if (ff_inlink_acknowledge_status(inlink, &status, &pts)) { > > + if (status == AVERROR_EOF) { > > + s->eof = 1; > > + } > > + } > > + } > > + > > + if (qsv) { > > + if (in || s->eof) { > > + qsv->eof = s->eof; > > + ret = ff_qsvvpp_filter_frame(qsv, inlink, in); > > + av_frame_free(&in); > > + > > + if (s->eof) { > > + ff_outlink_set_status(outlink, status, pts); > > + return 0; > > + } > > + > > + if (qsv->got_frame) { > > + qsv->got_frame = 0; > > + return ret; > > + } > > + } > > } else { > > - if (picref->pts != AV_NOPTS_VALUE) > > - picref->pts = av_rescale_q(picref->pts, inlink- > > >time_base, outlink->time_base); > > - ret = ff_filter_frame(outlink, picref); > > + if (in) { > > + if (in->pts != AV_NOPTS_VALUE) > > + in->pts = av_rescale_q(in->pts, inlink->time_base, > > outlink->time_base); > > + > > + ret = ff_filter_frame(outlink, in); > > + return ret; > > + } > > } > > > > - return ret; > > + if (s->eof) { > > + ff_outlink_set_status(outlink, status, pts); > > + return 0; > > + } else { > > + FF_FILTER_FORWARD_WANTED(outlink, inlink); > > + } > > + > > + return FFERROR_NOT_READY; > > } > > > > static int query_formats(AVFilterContext *ctx) @@ -531,7 +578,6 @@ > > static const AVFilterPad vpp_inputs[] = { > > .name = "default", > > .type = AVMEDIA_TYPE_VIDEO, > > .config_props = config_input, > > - .filter_frame = filter_frame, > > }, > > { NULL } > > }; > > @@ -554,6 +600,7 @@ AVFilter ff_vf_vpp_qsv = { > > .uninit = vpp_uninit, > > .inputs = vpp_inputs, > > .outputs = vpp_outputs, > > + .activate = activate, > > .priv_class = &vpp_class, > > .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, }; > > Ping, thanks Ping for review. @Mark Thompson @lizhong1008@gmail.com free to review this patch? > > Fei
On Wed, 2021-01-27 at 09:42 +0800, Fei Wang wrote: > Async depth will allow qsv filter cache few frames, and avoid force > switch and end filter task frame by frame. This change will improve > performance for some multi-task case, for example 1:N transcode( > decode + vpp + encode) with all QSV plugins. > > Signed-off-by: Fei Wang <fei.w.wang@intel.com> > --- > libavfilter/qsvvpp.c | 147 ++++++++++++++++++------------- > libavfilter/qsvvpp.h | 42 ++++++++- > libavfilter/vf_deinterlace_qsv.c | 8 -- > libavfilter/vf_vpp_qsv.c | 75 +++++++++++++--- > 4 files changed, 187 insertions(+), 85 deletions(-) > > diff --git a/libavfilter/qsvvpp.c b/libavfilter/qsvvpp.c > index f216b3f248..2e824e67e7 100644 > --- a/libavfilter/qsvvpp.c > +++ b/libavfilter/qsvvpp.c > @@ -27,6 +27,7 @@ > #include "libavutil/hwcontext_qsv.h" > #include "libavutil/time.h" > #include "libavutil/pixdesc.h" > +#include "libavutil/fifo.h" > > #include "internal.h" > #include "qsvvpp.h" > @@ -37,37 +38,6 @@ > #define IS_OPAQUE_MEMORY(mode) (mode & MFX_MEMTYPE_OPAQUE_FRAME) > #define IS_SYSTEM_MEMORY(mode) (mode & MFX_MEMTYPE_SYSTEM_MEMORY) > > -typedef struct QSVFrame { > - AVFrame *frame; > - mfxFrameSurface1 *surface; > - mfxFrameSurface1 surface_internal; /* for system memory */ > - struct QSVFrame *next; > -} QSVFrame; > - > -/* abstract struct for all QSV filters */ > -struct QSVVPPContext { > - mfxSession session; > - int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame);/* callback > */ > - enum AVPixelFormat out_sw_format; /* Real output format */ > - mfxVideoParam vpp_param; > - mfxFrameInfo *frame_infos; /* frame info for each input */ > - > - /* members related to the input/output surface */ > - int in_mem_mode; > - int out_mem_mode; > - QSVFrame *in_frame_list; > - QSVFrame *out_frame_list; > - int nb_surface_ptrs_in; > - int nb_surface_ptrs_out; > - mfxFrameSurface1 **surface_ptrs_in; > - mfxFrameSurface1 **surface_ptrs_out; > - > - /* MFXVPP extern parameters */ > - mfxExtOpaqueSurfaceAlloc opaque_alloc; > - mfxExtBuffer **ext_buffers; > - int nb_ext_buffers; > -}; > - > static const mfxHandleType handle_types[] = { > MFX_HANDLE_VA_DISPLAY, > MFX_HANDLE_D3D9_DEVICE_MANAGER, > @@ -336,9 +306,9 @@ static int fill_frameinfo_by_link(mfxFrameInfo *frameinfo, > AVFilterLink *link) > static void clear_unused_frames(QSVFrame *list) > { > while (list) { > - if (list->surface && !list->surface->Data.Locked) { > - list->surface = NULL; > + if (list->used && !list->queued && !list->surface.Data.Locked) { > av_frame_free(&list->frame); > + list->used = 0; > } > list = list->next; > } > @@ -361,8 +331,10 @@ static QSVFrame *get_free_frame(QSVFrame **list) > QSVFrame *out = *list; > > for (; out; out = out->next) { > - if (!out->surface) > + if (!out->used) { > + out->used = 1; > break; > + } > } > > if (!out) { > @@ -371,6 +343,7 @@ static QSVFrame *get_free_frame(QSVFrame **list) > av_log(NULL, AV_LOG_ERROR, "Can't alloc new output frame.\n"); > return NULL; > } > + out->used = 1; > out->next = *list; > *list = out; > } > @@ -402,7 +375,7 @@ static QSVFrame *submit_frame(QSVVPPContext *s, > AVFilterLink *inlink, AVFrame *p > return NULL; > } > qsv_frame->frame = av_frame_clone(picref); > - qsv_frame->surface = (mfxFrameSurface1 *)qsv_frame->frame->data[3]; > + qsv_frame->surface = *(mfxFrameSurface1 *)qsv_frame->frame->data[3]; > } else { > /* make a copy if the input is not padded as libmfx requires */ > if (picref->height & 31 || picref->linesize[0] & 31) { > @@ -425,27 +398,26 @@ static QSVFrame *submit_frame(QSVVPPContext *s, > AVFilterLink *inlink, AVFrame *p > qsv_frame->frame = av_frame_clone(picref); > > if (map_frame_to_surface(qsv_frame->frame, > - &qsv_frame->surface_internal) < 0) { > + &qsv_frame->surface) < 0) { > av_log(ctx, AV_LOG_ERROR, "Unsupported frame.\n"); > return NULL; > } > - qsv_frame->surface = &qsv_frame->surface_internal; > } > > - qsv_frame->surface->Info = s- > >frame_infos[FF_INLINK_IDX(inlink)]; > - qsv_frame->surface->Data.TimeStamp = av_rescale_q(qsv_frame->frame->pts, > + qsv_frame->surface.Info = s- > >frame_infos[FF_INLINK_IDX(inlink)]; > + qsv_frame->surface.Data.TimeStamp = av_rescale_q(qsv_frame->frame->pts, > inlink->time_base, > default_tb); > > - qsv_frame->surface->Info.PicStruct = > + qsv_frame->surface.Info.PicStruct = > !qsv_frame->frame->interlaced_frame ? MFX_PICSTRUCT_PROGRESSIVE : > (qsv_frame->frame->top_field_first ? MFX_PICSTRUCT_FIELD_TFF : > MFX_PICSTRUCT_FIELD_BFF); > if (qsv_frame->frame->repeat_pict == 1) > - qsv_frame->surface->Info.PicStruct |= MFX_PICSTRUCT_FIELD_REPEATED; > + qsv_frame->surface.Info.PicStruct |= MFX_PICSTRUCT_FIELD_REPEATED; > else if (qsv_frame->frame->repeat_pict == 2) > - qsv_frame->surface->Info.PicStruct |= MFX_PICSTRUCT_FRAME_DOUBLING; > + qsv_frame->surface.Info.PicStruct |= MFX_PICSTRUCT_FRAME_DOUBLING; > else if (qsv_frame->frame->repeat_pict == 4) > - qsv_frame->surface->Info.PicStruct |= MFX_PICSTRUCT_FRAME_TRIPLING; > + qsv_frame->surface.Info.PicStruct |= MFX_PICSTRUCT_FRAME_TRIPLING; > > return qsv_frame; > } > @@ -476,7 +448,7 @@ static QSVFrame *query_frame(QSVVPPContext *s, > AVFilterLink *outlink) > return NULL; > } > > - out_frame->surface = (mfxFrameSurface1 *)out_frame->frame->data[3]; > + out_frame->surface = *(mfxFrameSurface1 *)out_frame->frame->data[3]; > } else { > /* Get a frame with aligned dimensions. > * Libmfx need system memory being 128x64 aligned */ > @@ -490,14 +462,12 @@ static QSVFrame *query_frame(QSVVPPContext *s, > AVFilterLink *outlink) > out_frame->frame->height = outlink->h; > > ret = map_frame_to_surface(out_frame->frame, > - &out_frame->surface_internal); > + &out_frame->surface); > if (ret < 0) > return NULL; > - > - out_frame->surface = &out_frame->surface_internal; > } > > - out_frame->surface->Info = s->vpp_param.vpp.Out; > + out_frame->surface.Info = s->vpp_param.vpp.Out; > > return out_frame; > } > @@ -666,6 +636,16 @@ static int init_vpp_session(AVFilterContext *avctx, > QSVVPPContext *s) > return 0; > } > > +static unsigned int qsv_fifo_item_size(void) > +{ > + return sizeof(mfxSyncPoint*) + sizeof(QSVFrame*); > +} > + > +static unsigned int qsv_fifo_size(const AVFifoBuffer* fifo) > +{ > + return av_fifo_size(fifo)/qsv_fifo_item_size(); > +} > + > int ff_qsvvpp_create(AVFilterContext *avctx, QSVVPPContext **vpp, QSVVPPParam > *param) > { > int i; > @@ -738,7 +718,17 @@ int ff_qsvvpp_create(AVFilterContext *avctx, > QSVVPPContext **vpp, QSVVPPParam *p > s->vpp_param.ExtParam = param->ext_buf; > } > > - s->vpp_param.AsyncDepth = 1; > + s->got_frame = 0; > + > + /** keep fifo size at least 1. Even when async_depth is 0, fifo is used. > */ > + s->async_fifo = av_fifo_alloc((param->async_depth + 1) * > qsv_fifo_item_size()); > + s->async_depth = param->async_depth; > + if (!s->async_fifo) { > + ret = AVERROR(ENOMEM); > + goto failed; > + } > + > + s->vpp_param.AsyncDepth = param->async_depth; > > if (IS_SYSTEM_MEMORY(s->in_mem_mode)) > s->vpp_param.IOPattern |= MFX_IOPATTERN_IN_SYSTEM_MEMORY; > @@ -793,6 +783,7 @@ int ff_qsvvpp_free(QSVVPPContext **vpp) > av_freep(&s->surface_ptrs_out); > av_freep(&s->ext_buffers); > av_freep(&s->frame_infos); > + av_fifo_free(s->async_fifo); > av_freep(vpp); > > return 0; > @@ -803,9 +794,29 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s, AVFilterLink > *inlink, AVFrame *picr > AVFilterContext *ctx = inlink->dst; > AVFilterLink *outlink = ctx->outputs[0]; > mfxSyncPoint sync; > - QSVFrame *in_frame, *out_frame; > + QSVFrame *in_frame, *out_frame, *tmp; > int ret, filter_ret; > > + while (s->eof && qsv_fifo_size(s->async_fifo)) { > + av_fifo_generic_read(s->async_fifo, &tmp, sizeof(tmp), NULL); > + av_fifo_generic_read(s->async_fifo, &sync, sizeof(sync), NULL); > + if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0) > + av_log(ctx, AV_LOG_WARNING, "Sync failed.\n"); > + > + filter_ret = s->filter_frame(outlink, tmp->frame); > + if (filter_ret < 0) { > + av_frame_free(&tmp->frame); > + ret = filter_ret; > + break; > + } > + tmp->queued = 0; > + s->got_frame = 1; > + tmp->frame = NULL; > + }; > + > + if (!picref) > + return 0; > + > in_frame = submit_frame(s, inlink, picref); > if (!in_frame) { > av_log(ctx, AV_LOG_ERROR, "Failed to submit frame on input[%d]\n", > @@ -821,8 +832,8 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s, AVFilterLink > *inlink, AVFrame *picr > } > > do { > - ret = MFXVideoVPP_RunFrameVPPAsync(s->session, in_frame->surface, > - out_frame->surface, NULL, > &sync); > + ret = MFXVideoVPP_RunFrameVPPAsync(s->session, &in_frame- > >surface, > + &out_frame->surface, NULL, > &sync); > if (ret == MFX_WRN_DEVICE_BUSY) > av_usleep(500); > } while (ret == MFX_WRN_DEVICE_BUSY); > @@ -833,20 +844,32 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s, > AVFilterLink *inlink, AVFrame *picr > ret = AVERROR(EAGAIN); > break; > } > + out_frame->frame->pts = av_rescale_q(out_frame- > >surface.Data.TimeStamp, > + default_tb, outlink->time_base); > > - if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0) > - av_log(ctx, AV_LOG_WARNING, "Sync failed.\n"); > + out_frame->queued = 1; > + av_fifo_generic_write(s->async_fifo, &out_frame, sizeof(out_frame), > NULL); > + av_fifo_generic_write(s->async_fifo, &sync, sizeof(sync), NULL); > > - out_frame->frame->pts = av_rescale_q(out_frame->surface- > >Data.TimeStamp, > - default_tb, outlink->time_base); > > - filter_ret = s->filter_frame(outlink, out_frame->frame); > - if (filter_ret < 0) { > - av_frame_free(&out_frame->frame); > - ret = filter_ret; > - break; > + if (qsv_fifo_size(s->async_fifo) > s->async_depth) { > + av_fifo_generic_read(s->async_fifo, &tmp, sizeof(tmp), NULL); > + av_fifo_generic_read(s->async_fifo, &sync, sizeof(sync), NULL); > + > + if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0) > + av_log(ctx, AV_LOG_WARNING, "Sync failed.\n"); > + > + filter_ret = s->filter_frame(outlink, tmp->frame); > + if (filter_ret < 0) { > + av_frame_free(&tmp->frame); > + ret = filter_ret; > + break; > + } > + > + tmp->queued = 0; > + s->got_frame = 1; > + tmp->frame = NULL; > } > - out_frame->frame = NULL; > } while(ret == MFX_ERR_MORE_SURFACE); > > return ret; > diff --git a/libavfilter/qsvvpp.h b/libavfilter/qsvvpp.h > index b4baeedf9e..48c8ffc2d2 100644 > --- a/libavfilter/qsvvpp.h > +++ b/libavfilter/qsvvpp.h > @@ -27,6 +27,7 @@ > #include <mfx/mfxvideo.h> > > #include "avfilter.h" > +#include "libavutil/fifo.h" > > #define FF_INLINK_IDX(link) ((int)((link)->dstpad - (link)->dst- > >input_pads)) > #define FF_OUTLINK_IDX(link) ((int)((link)->srcpad - (link)->src- > >output_pads)) > @@ -39,7 +40,44 @@ > ((MFX_VERSION.Major > (MAJOR)) || \ > (MFX_VERSION.Major == (MAJOR) && MFX_VERSION.Minor >= (MINOR))) > > -typedef struct QSVVPPContext QSVVPPContext; > +#define VPP_ASYNC_DEPTH_DEFAULT 1 > + > +typedef struct QSVFrame { > + AVFrame *frame; > + mfxFrameSurface1 surface; > + struct QSVFrame *next; > + int queued; > + int used; May we use queue count as what wenbin did for qsv decoder in http://ffmpeg.org/pipermail/ffmpeg-devel/2021-March/277633.html ? If so, I think we may not use the member of 'used' and the logic will besimple. Thanks Haihao > +} QSVFrame; > + > +typedef struct QSVVPPContext { > + mfxSession session; > + int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame); /**< > callback */ > + enum AVPixelFormat out_sw_format; /**< Real output format */ > + mfxVideoParam vpp_param; > + mfxFrameInfo *frame_infos; /**< frame info for each input */ > + > + /** members related to the input/output surface */ > + int in_mem_mode; > + int out_mem_mode; > + QSVFrame *in_frame_list; > + QSVFrame *out_frame_list; > + int nb_surface_ptrs_in; > + int nb_surface_ptrs_out; > + mfxFrameSurface1 **surface_ptrs_in; > + mfxFrameSurface1 **surface_ptrs_out; > + > + /** MFXVPP extern parameters */ > + mfxExtOpaqueSurfaceAlloc opaque_alloc; > + mfxExtBuffer **ext_buffers; > + int nb_ext_buffers; > + > + int got_frame; > + int async_depth; > + int eof; > + /** order with frame_out, sync */ > + AVFifoBuffer *async_fifo; > +} QSVVPPContext; > > typedef struct QSVVPPCrop { > int in_idx; ///< Input index > @@ -60,6 +98,8 @@ typedef struct QSVVPPParam { > /* Crop information for each input, if needed */ > int num_crop; > QSVVPPCrop *crop; > + > + int async_depth; > } QSVVPPParam; > > /* create and initialize the QSV session */ > diff --git a/libavfilter/vf_deinterlace_qsv.c > b/libavfilter/vf_deinterlace_qsv.c > index 89a282f99e..a620567de2 100644 > --- a/libavfilter/vf_deinterlace_qsv.c > +++ b/libavfilter/vf_deinterlace_qsv.c > @@ -47,14 +47,6 @@ enum { > QSVDEINT_MORE_INPUT, > }; > > -typedef struct QSVFrame { > - AVFrame *frame; > - mfxFrameSurface1 surface; > - int used; > - > - struct QSVFrame *next; > -} QSVFrame; > - > typedef struct QSVDeintContext { > const AVClass *class; > > diff --git a/libavfilter/vf_vpp_qsv.c b/libavfilter/vf_vpp_qsv.c > index 5d57707455..83bdf1276c 100644 > --- a/libavfilter/vf_vpp_qsv.c > +++ b/libavfilter/vf_vpp_qsv.c > @@ -32,6 +32,7 @@ > #include "formats.h" > #include "internal.h" > #include "avfilter.h" > +#include "filters.h" > #include "libavcodec/avcodec.h" > #include "libavformat/avformat.h" > > @@ -93,6 +94,9 @@ typedef struct VPPContext{ > char *cx, *cy, *cw, *ch; > char *ow, *oh; > char *output_format_str; > + > + int async_depth; > + int eof; > } VPPContext; > > static const AVOption options[] = { > @@ -128,6 +132,7 @@ static const AVOption options[] = { > { "h", "Output video height", OFFSET(oh), AV_OPT_TYPE_STRING, { > .str="w*ch/cw" }, 0, 255, .flags = FLAGS }, > { "height", "Output video height", OFFSET(oh), AV_OPT_TYPE_STRING, { > .str="w*ch/cw" }, 0, 255, .flags = FLAGS }, > { "format", "Output pixel format", OFFSET(output_format_str), > AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS }, > + { "async_depth", "Internal parallelization depth, the higher the value > the higher the latency.", OFFSET(async_depth), AV_OPT_TYPE_INT, { .i64 = > VPP_ASYNC_DEPTH_DEFAULT }, 0, INT_MAX, .flags = FLAGS }, > > { NULL } > }; > @@ -303,6 +308,7 @@ static int config_output(AVFilterLink *outlink) > param.filter_frame = NULL; > param.num_ext_buf = 0; > param.ext_buf = ext_buf; > + param.async_depth = vpp->async_depth; > > if (inlink->format == AV_PIX_FMT_QSV) { > if (!inlink->hw_frames_ctx || !inlink->hw_frames_ctx->data) > @@ -467,23 +473,64 @@ static int config_output(AVFilterLink *outlink) > return 0; > } > > -static int filter_frame(AVFilterLink *inlink, AVFrame *picref) > +static int activate(AVFilterContext *ctx) > { > - int ret = 0; > - AVFilterContext *ctx = inlink->dst; > - VPPContext *vpp = inlink->dst->priv; > - AVFilterLink *outlink = ctx->outputs[0]; > - > - if (vpp->qsv) { > - ret = ff_qsvvpp_filter_frame(vpp->qsv, inlink, picref); > - av_frame_free(&picref); > + AVFilterLink *inlink = ctx->inputs[0]; > + AVFilterLink *outlink = ctx->outputs[0]; > + VPPContext *s =ctx->priv; > + QSVVPPContext *qsv = s->qsv; > + AVFrame *in = NULL; > + int ret, status; > + int64_t pts; > + > + FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink); > + > + if (!s->eof) { > + ret = ff_inlink_consume_frame(inlink, &in); > + if (ret < 0) > + return ret; > + > + if (ff_inlink_acknowledge_status(inlink, &status, &pts)) { > + if (status == AVERROR_EOF) { > + s->eof = 1; > + } > + } > + } > + > + if (qsv) { > + if (in || s->eof) { > + qsv->eof = s->eof; > + ret = ff_qsvvpp_filter_frame(qsv, inlink, in); > + av_frame_free(&in); > + > + if (s->eof) { > + ff_outlink_set_status(outlink, status, pts); > + return 0; > + } > + > + if (qsv->got_frame) { > + qsv->got_frame = 0; > + return ret; > + } > + } > } else { > - if (picref->pts != AV_NOPTS_VALUE) > - picref->pts = av_rescale_q(picref->pts, inlink->time_base, > outlink->time_base); > - ret = ff_filter_frame(outlink, picref); > + if (in) { > + if (in->pts != AV_NOPTS_VALUE) > + in->pts = av_rescale_q(in->pts, inlink->time_base, outlink- > >time_base); > + > + ret = ff_filter_frame(outlink, in); > + return ret; > + } > } > > - return ret; > + if (s->eof) { > + ff_outlink_set_status(outlink, status, pts); > + return 0; > + } else { > + FF_FILTER_FORWARD_WANTED(outlink, inlink); > + } > + > + return FFERROR_NOT_READY; > } > > static int query_formats(AVFilterContext *ctx) > @@ -531,7 +578,6 @@ static const AVFilterPad vpp_inputs[] = { > .name = "default", > .type = AVMEDIA_TYPE_VIDEO, > .config_props = config_input, > - .filter_frame = filter_frame, > }, > { NULL } > }; > @@ -554,6 +600,7 @@ AVFilter ff_vf_vpp_qsv = { > .uninit = vpp_uninit, > .inputs = vpp_inputs, > .outputs = vpp_outputs, > + .activate = activate, > .priv_class = &vpp_class, > .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, > };
On Fri, 2021-03-12 at 06:20 +0000, Xiang, Haihao wrote: > On Wed, 2021-01-27 at 09:42 +0800, Fei Wang wrote: > > Async depth will allow qsv filter cache few frames, and avoid force > > switch and end filter task frame by frame. This change will improve > > performance for some multi-task case, for example 1:N transcode( > > decode + vpp + encode) with all QSV plugins. > > > > Signed-off-by: Fei Wang <fei.w.wang@intel.com> > > --- > > libavfilter/qsvvpp.c | 147 ++++++++++++++++++--------- > > ---- > > libavfilter/qsvvpp.h | 42 ++++++++- > > libavfilter/vf_deinterlace_qsv.c | 8 -- > > libavfilter/vf_vpp_qsv.c | 75 +++++++++++++--- > > 4 files changed, 187 insertions(+), 85 deletions(-) > > > > diff --git a/libavfilter/qsvvpp.c b/libavfilter/qsvvpp.c > > index f216b3f248..2e824e67e7 100644 > > --- a/libavfilter/qsvvpp.c > > +++ b/libavfilter/qsvvpp.c > > @@ -27,6 +27,7 @@ > > #include "libavutil/hwcontext_qsv.h" > > #include "libavutil/time.h" > > #include "libavutil/pixdesc.h" > > +#include "libavutil/fifo.h" > > > > #include "internal.h" > > #include "qsvvpp.h" > > @@ -37,37 +38,6 @@ > > #define IS_OPAQUE_MEMORY(mode) (mode & MFX_MEMTYPE_OPAQUE_FRAME) > > #define IS_SYSTEM_MEMORY(mode) (mode & MFX_MEMTYPE_SYSTEM_MEMORY) > > > > -typedef struct QSVFrame { > > - AVFrame *frame; > > - mfxFrameSurface1 *surface; > > - mfxFrameSurface1 surface_internal; /* for system memory */ > > - struct QSVFrame *next; > > -} QSVFrame; > > - > > -/* abstract struct for all QSV filters */ > > -struct QSVVPPContext { > > - mfxSession session; > > - int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame);/* > > callback > > */ > > - enum AVPixelFormat out_sw_format; /* Real output format */ > > - mfxVideoParam vpp_param; > > - mfxFrameInfo *frame_infos; /* frame info for each > > input */ > > - > > - /* members related to the input/output surface */ > > - int in_mem_mode; > > - int out_mem_mode; > > - QSVFrame *in_frame_list; > > - QSVFrame *out_frame_list; > > - int nb_surface_ptrs_in; > > - int nb_surface_ptrs_out; > > - mfxFrameSurface1 **surface_ptrs_in; > > - mfxFrameSurface1 **surface_ptrs_out; > > - > > - /* MFXVPP extern parameters */ > > - mfxExtOpaqueSurfaceAlloc opaque_alloc; > > - mfxExtBuffer **ext_buffers; > > - int nb_ext_buffers; > > -}; > > - > > static const mfxHandleType handle_types[] = { > > MFX_HANDLE_VA_DISPLAY, > > MFX_HANDLE_D3D9_DEVICE_MANAGER, > > @@ -336,9 +306,9 @@ static int fill_frameinfo_by_link(mfxFrameInfo > > *frameinfo, > > AVFilterLink *link) > > static void clear_unused_frames(QSVFrame *list) > > { > > while (list) { > > - if (list->surface && !list->surface->Data.Locked) { > > - list->surface = NULL; > > + if (list->used && !list->queued && !list- > > >surface.Data.Locked) { > > av_frame_free(&list->frame); > > + list->used = 0; > > } > > list = list->next; > > } > > @@ -361,8 +331,10 @@ static QSVFrame *get_free_frame(QSVFrame > > **list) > > QSVFrame *out = *list; > > > > for (; out; out = out->next) { > > - if (!out->surface) > > + if (!out->used) { > > + out->used = 1; > > break; > > + } > > } > > > > if (!out) { > > @@ -371,6 +343,7 @@ static QSVFrame *get_free_frame(QSVFrame > > **list) > > av_log(NULL, AV_LOG_ERROR, "Can't alloc new output > > frame.\n"); > > return NULL; > > } > > + out->used = 1; > > out->next = *list; > > *list = out; > > } > > @@ -402,7 +375,7 @@ static QSVFrame *submit_frame(QSVVPPContext *s, > > AVFilterLink *inlink, AVFrame *p > > return NULL; > > } > > qsv_frame->frame = av_frame_clone(picref); > > - qsv_frame->surface = (mfxFrameSurface1 *)qsv_frame->frame- > > >data[3]; > > + qsv_frame->surface = *(mfxFrameSurface1 *)qsv_frame- > > >frame->data[3]; > > } else { > > /* make a copy if the input is not padded as libmfx > > requires */ > > if (picref->height & 31 || picref->linesize[0] & 31) { > > @@ -425,27 +398,26 @@ static QSVFrame *submit_frame(QSVVPPContext > > *s, > > AVFilterLink *inlink, AVFrame *p > > qsv_frame->frame = av_frame_clone(picref); > > > > if (map_frame_to_surface(qsv_frame->frame, > > - &qsv_frame->surface_internal) < 0) > > { > > + &qsv_frame->surface) < 0) { > > av_log(ctx, AV_LOG_ERROR, "Unsupported frame.\n"); > > return NULL; > > } > > - qsv_frame->surface = &qsv_frame->surface_internal; > > } > > > > - qsv_frame->surface->Info = s- > > > frame_infos[FF_INLINK_IDX(inlink)]; > > > > - qsv_frame->surface->Data.TimeStamp = av_rescale_q(qsv_frame- > > >frame->pts, > > + qsv_frame->surface.Info = s- > > > frame_infos[FF_INLINK_IDX(inlink)]; > > > > + qsv_frame->surface.Data.TimeStamp = av_rescale_q(qsv_frame- > > >frame->pts, > > inlink- > > >time_base, > > default_tb); > > > > - qsv_frame->surface->Info.PicStruct = > > + qsv_frame->surface.Info.PicStruct = > > !qsv_frame->frame->interlaced_frame ? > > MFX_PICSTRUCT_PROGRESSIVE : > > (qsv_frame->frame->top_field_first ? > > MFX_PICSTRUCT_FIELD_TFF : > > MFX_PICSTRUCT_FIE > > LD_BFF); > > if (qsv_frame->frame->repeat_pict == 1) > > - qsv_frame->surface->Info.PicStruct |= > > MFX_PICSTRUCT_FIELD_REPEATED; > > + qsv_frame->surface.Info.PicStruct |= > > MFX_PICSTRUCT_FIELD_REPEATED; > > else if (qsv_frame->frame->repeat_pict == 2) > > - qsv_frame->surface->Info.PicStruct |= > > MFX_PICSTRUCT_FRAME_DOUBLING; > > + qsv_frame->surface.Info.PicStruct |= > > MFX_PICSTRUCT_FRAME_DOUBLING; > > else if (qsv_frame->frame->repeat_pict == 4) > > - qsv_frame->surface->Info.PicStruct |= > > MFX_PICSTRUCT_FRAME_TRIPLING; > > + qsv_frame->surface.Info.PicStruct |= > > MFX_PICSTRUCT_FRAME_TRIPLING; > > > > return qsv_frame; > > } > > @@ -476,7 +448,7 @@ static QSVFrame *query_frame(QSVVPPContext *s, > > AVFilterLink *outlink) > > return NULL; > > } > > > > - out_frame->surface = (mfxFrameSurface1 *)out_frame->frame- > > >data[3]; > > + out_frame->surface = *(mfxFrameSurface1 *)out_frame- > > >frame->data[3]; > > } else { > > /* Get a frame with aligned dimensions. > > * Libmfx need system memory being 128x64 aligned */ > > @@ -490,14 +462,12 @@ static QSVFrame *query_frame(QSVVPPContext > > *s, > > AVFilterLink *outlink) > > out_frame->frame->height = outlink->h; > > > > ret = map_frame_to_surface(out_frame->frame, > > - &out_frame->surface_internal); > > + &out_frame->surface); > > if (ret < 0) > > return NULL; > > - > > - out_frame->surface = &out_frame->surface_internal; > > } > > > > - out_frame->surface->Info = s->vpp_param.vpp.Out; > > + out_frame->surface.Info = s->vpp_param.vpp.Out; > > > > return out_frame; > > } > > @@ -666,6 +636,16 @@ static int init_vpp_session(AVFilterContext > > *avctx, > > QSVVPPContext *s) > > return 0; > > } > > > > +static unsigned int qsv_fifo_item_size(void) > > +{ > > + return sizeof(mfxSyncPoint*) + sizeof(QSVFrame*); > > +} > > + > > +static unsigned int qsv_fifo_size(const AVFifoBuffer* fifo) > > +{ > > + return av_fifo_size(fifo)/qsv_fifo_item_size(); > > +} > > + > > int ff_qsvvpp_create(AVFilterContext *avctx, QSVVPPContext **vpp, > > QSVVPPParam > > *param) > > { > > int i; > > @@ -738,7 +718,17 @@ int ff_qsvvpp_create(AVFilterContext *avctx, > > QSVVPPContext **vpp, QSVVPPParam *p > > s->vpp_param.ExtParam = param->ext_buf; > > } > > > > - s->vpp_param.AsyncDepth = 1; > > + s->got_frame = 0; > > + > > + /** keep fifo size at least 1. Even when async_depth is 0, > > fifo is used. > > */ > > + s->async_fifo = av_fifo_alloc((param->async_depth + 1) * > > qsv_fifo_item_size()); > > + s->async_depth = param->async_depth; > > + if (!s->async_fifo) { > > + ret = AVERROR(ENOMEM); > > + goto failed; > > + } > > + > > + s->vpp_param.AsyncDepth = param->async_depth; > > > > if (IS_SYSTEM_MEMORY(s->in_mem_mode)) > > s->vpp_param.IOPattern |= MFX_IOPATTERN_IN_SYSTEM_MEMORY; > > @@ -793,6 +783,7 @@ int ff_qsvvpp_free(QSVVPPContext **vpp) > > av_freep(&s->surface_ptrs_out); > > av_freep(&s->ext_buffers); > > av_freep(&s->frame_infos); > > + av_fifo_free(s->async_fifo); > > av_freep(vpp); > > > > return 0; > > @@ -803,9 +794,29 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s, > > AVFilterLink > > *inlink, AVFrame *picr > > AVFilterContext *ctx = inlink->dst; > > AVFilterLink *outlink = ctx->outputs[0]; > > mfxSyncPoint sync; > > - QSVFrame *in_frame, *out_frame; > > + QSVFrame *in_frame, *out_frame, *tmp; > > int ret, filter_ret; > > > > + while (s->eof && qsv_fifo_size(s->async_fifo)) { > > + av_fifo_generic_read(s->async_fifo, &tmp, sizeof(tmp), > > NULL); > > + av_fifo_generic_read(s->async_fifo, &sync, sizeof(sync), > > NULL); > > + if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < > > 0) > > + av_log(ctx, AV_LOG_WARNING, "Sync failed.\n"); > > + > > + filter_ret = s->filter_frame(outlink, tmp->frame); > > + if (filter_ret < 0) { > > + av_frame_free(&tmp->frame); > > + ret = filter_ret; > > + break; > > + } > > + tmp->queued = 0; > > + s->got_frame = 1; > > + tmp->frame = NULL; > > + }; > > + > > + if (!picref) > > + return 0; > > + > > in_frame = submit_frame(s, inlink, picref); > > if (!in_frame) { > > av_log(ctx, AV_LOG_ERROR, "Failed to submit frame on > > input[%d]\n", > > @@ -821,8 +832,8 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s, > > AVFilterLink > > *inlink, AVFrame *picr > > } > > > > do { > > - ret = MFXVideoVPP_RunFrameVPPAsync(s->session, > > in_frame->surface, > > - out_frame->surface, > > NULL, > > &sync); > > + ret = MFXVideoVPP_RunFrameVPPAsync(s->session, > > &in_frame- > > > surface, > > > > + &out_frame- > > >surface, NULL, > > &sync); > > if (ret == MFX_WRN_DEVICE_BUSY) > > av_usleep(500); > > } while (ret == MFX_WRN_DEVICE_BUSY); > > @@ -833,20 +844,32 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s, > > AVFilterLink *inlink, AVFrame *picr > > ret = AVERROR(EAGAIN); > > break; > > } > > + out_frame->frame->pts = av_rescale_q(out_frame- > > > surface.Data.TimeStamp, > > > > + default_tb, outlink- > > >time_base); > > > > - if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < > > 0) > > - av_log(ctx, AV_LOG_WARNING, "Sync failed.\n"); > > + out_frame->queued = 1; > > + av_fifo_generic_write(s->async_fifo, &out_frame, > > sizeof(out_frame), > > NULL); > > + av_fifo_generic_write(s->async_fifo, &sync, sizeof(sync), > > NULL); > > > > - out_frame->frame->pts = av_rescale_q(out_frame->surface- > > > Data.TimeStamp, > > > > - default_tb, outlink- > > >time_base); > > > > - filter_ret = s->filter_frame(outlink, out_frame->frame); > > - if (filter_ret < 0) { > > - av_frame_free(&out_frame->frame); > > - ret = filter_ret; > > - break; > > + if (qsv_fifo_size(s->async_fifo) > s->async_depth) { > > + av_fifo_generic_read(s->async_fifo, &tmp, sizeof(tmp), > > NULL); > > + av_fifo_generic_read(s->async_fifo, &sync, > > sizeof(sync), NULL); > > + > > + if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) > > < 0) > > + av_log(ctx, AV_LOG_WARNING, "Sync failed.\n"); > > + > > + filter_ret = s->filter_frame(outlink, tmp->frame); > > + if (filter_ret < 0) { > > + av_frame_free(&tmp->frame); > > + ret = filter_ret; > > + break; > > + } > > + > > + tmp->queued = 0; > > + s->got_frame = 1; > > + tmp->frame = NULL; > > } > > - out_frame->frame = NULL; > > } while(ret == MFX_ERR_MORE_SURFACE); > > > > return ret; > > diff --git a/libavfilter/qsvvpp.h b/libavfilter/qsvvpp.h > > index b4baeedf9e..48c8ffc2d2 100644 > > --- a/libavfilter/qsvvpp.h > > +++ b/libavfilter/qsvvpp.h > > @@ -27,6 +27,7 @@ > > #include <mfx/mfxvideo.h> > > > > #include "avfilter.h" > > +#include "libavutil/fifo.h" > > > > #define FF_INLINK_IDX(link) ((int)((link)->dstpad - (link)->dst- > > > input_pads)) > > > > #define FF_OUTLINK_IDX(link) ((int)((link)->srcpad - (link)->src- > > > output_pads)) > > > > @@ -39,7 +40,44 @@ > > ((MFX_VERSION.Major > (MAJOR)) || \ > > (MFX_VERSION.Major == (MAJOR) && MFX_VERSION.Minor >= > > (MINOR))) > > > > -typedef struct QSVVPPContext QSVVPPContext; > > +#define VPP_ASYNC_DEPTH_DEFAULT 1 > > + > > +typedef struct QSVFrame { > > + AVFrame *frame; > > + mfxFrameSurface1 surface; > > + struct QSVFrame *next; > > + int queued; > > + int used; > > May we use queue count as what wenbin did for qsv decoder in > http://ffmpeg.org/pipermail/ffmpeg-devel/2021-March/277633.html ? If > so, I think > we may not use the member of 'used' and the logic will besimple. > > Thanks > Haihao It's a better choice. I will try to see if it possible to combine them into one. Thanks for reviewing. Fei > > > > +} QSVFrame; > > + > > +typedef struct QSVVPPContext { > > + mfxSession session; > > + int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame); > > /**< > > callback */ > > + enum AVPixelFormat out_sw_format; /**< Real output format > > */ > > + mfxVideoParam vpp_param; > > + mfxFrameInfo *frame_infos; /**< frame info for each > > input */ > > + > > + /** members related to the input/output surface */ > > + int in_mem_mode; > > + int out_mem_mode; > > + QSVFrame *in_frame_list; > > + QSVFrame *out_frame_list; > > + int nb_surface_ptrs_in; > > + int nb_surface_ptrs_out; > > + mfxFrameSurface1 **surface_ptrs_in; > > + mfxFrameSurface1 **surface_ptrs_out; > > + > > + /** MFXVPP extern parameters */ > > + mfxExtOpaqueSurfaceAlloc opaque_alloc; > > + mfxExtBuffer **ext_buffers; > > + int nb_ext_buffers; > > + > > + int got_frame; > > + int async_depth; > > + int eof; > > + /** order with frame_out, sync */ > > + AVFifoBuffer *async_fifo; > > +} QSVVPPContext; > > > > typedef struct QSVVPPCrop { > > int in_idx; ///< Input index > > @@ -60,6 +98,8 @@ typedef struct QSVVPPParam { > > /* Crop information for each input, if needed */ > > int num_crop; > > QSVVPPCrop *crop; > > + > > + int async_depth; > > } QSVVPPParam; > > > > /* create and initialize the QSV session */ > > diff --git a/libavfilter/vf_deinterlace_qsv.c > > b/libavfilter/vf_deinterlace_qsv.c > > index 89a282f99e..a620567de2 100644 > > --- a/libavfilter/vf_deinterlace_qsv.c > > +++ b/libavfilter/vf_deinterlace_qsv.c > > @@ -47,14 +47,6 @@ enum { > > QSVDEINT_MORE_INPUT, > > }; > > > > -typedef struct QSVFrame { > > - AVFrame *frame; > > - mfxFrameSurface1 surface; > > - int used; > > - > > - struct QSVFrame *next; > > -} QSVFrame; > > - > > typedef struct QSVDeintContext { > > const AVClass *class; > > > > diff --git a/libavfilter/vf_vpp_qsv.c b/libavfilter/vf_vpp_qsv.c > > index 5d57707455..83bdf1276c 100644 > > --- a/libavfilter/vf_vpp_qsv.c > > +++ b/libavfilter/vf_vpp_qsv.c > > @@ -32,6 +32,7 @@ > > #include "formats.h" > > #include "internal.h" > > #include "avfilter.h" > > +#include "filters.h" > > #include "libavcodec/avcodec.h" > > #include "libavformat/avformat.h" > > > > @@ -93,6 +94,9 @@ typedef struct VPPContext{ > > char *cx, *cy, *cw, *ch; > > char *ow, *oh; > > char *output_format_str; > > + > > + int async_depth; > > + int eof; > > } VPPContext; > > > > static const AVOption options[] = { > > @@ -128,6 +132,7 @@ static const AVOption options[] = { > > { "h", "Output video height", OFFSET(oh), > > AV_OPT_TYPE_STRING, { > > .str="w*ch/cw" }, 0, 255, .flags = FLAGS }, > > { "height", "Output video height", OFFSET(oh), > > AV_OPT_TYPE_STRING, { > > .str="w*ch/cw" }, 0, 255, .flags = FLAGS }, > > { "format", "Output pixel format", OFFSET(output_format_str), > > AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS }, > > + { "async_depth", "Internal parallelization depth, the higher > > the value > > the higher the latency.", OFFSET(async_depth), AV_OPT_TYPE_INT, { > > .i64 = > > VPP_ASYNC_DEPTH_DEFAULT }, 0, INT_MAX, .flags = FLAGS }, > > > > { NULL } > > }; > > @@ -303,6 +308,7 @@ static int config_output(AVFilterLink *outlink) > > param.filter_frame = NULL; > > param.num_ext_buf = 0; > > param.ext_buf = ext_buf; > > + param.async_depth = vpp->async_depth; > > > > if (inlink->format == AV_PIX_FMT_QSV) { > > if (!inlink->hw_frames_ctx || !inlink->hw_frames_ctx- > > >data) > > @@ -467,23 +473,64 @@ static int config_output(AVFilterLink > > *outlink) > > return 0; > > } > > > > -static int filter_frame(AVFilterLink *inlink, AVFrame *picref) > > +static int activate(AVFilterContext *ctx) > > { > > - int ret = 0; > > - AVFilterContext *ctx = inlink->dst; > > - VPPContext *vpp = inlink->dst->priv; > > - AVFilterLink *outlink = ctx->outputs[0]; > > - > > - if (vpp->qsv) { > > - ret = ff_qsvvpp_filter_frame(vpp->qsv, inlink, picref); > > - av_frame_free(&picref); > > + AVFilterLink *inlink = ctx->inputs[0]; > > + AVFilterLink *outlink = ctx->outputs[0]; > > + VPPContext *s =ctx->priv; > > + QSVVPPContext *qsv = s->qsv; > > + AVFrame *in = NULL; > > + int ret, status; > > + int64_t pts; > > + > > + FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink); > > + > > + if (!s->eof) { > > + ret = ff_inlink_consume_frame(inlink, &in); > > + if (ret < 0) > > + return ret; > > + > > + if (ff_inlink_acknowledge_status(inlink, &status, &pts)) { > > + if (status == AVERROR_EOF) { > > + s->eof = 1; > > + } > > + } > > + } > > + > > + if (qsv) { > > + if (in || s->eof) { > > + qsv->eof = s->eof; > > + ret = ff_qsvvpp_filter_frame(qsv, inlink, in); > > + av_frame_free(&in); > > + > > + if (s->eof) { > > + ff_outlink_set_status(outlink, status, pts); > > + return 0; > > + } > > + > > + if (qsv->got_frame) { > > + qsv->got_frame = 0; > > + return ret; > > + } > > + } > > } else { > > - if (picref->pts != AV_NOPTS_VALUE) > > - picref->pts = av_rescale_q(picref->pts, inlink- > > >time_base, > > outlink->time_base); > > - ret = ff_filter_frame(outlink, picref); > > + if (in) { > > + if (in->pts != AV_NOPTS_VALUE) > > + in->pts = av_rescale_q(in->pts, inlink->time_base, > > outlink- > > > time_base); > > > > + > > + ret = ff_filter_frame(outlink, in); > > + return ret; > > + } > > } > > > > - return ret; > > + if (s->eof) { > > + ff_outlink_set_status(outlink, status, pts); > > + return 0; > > + } else { > > + FF_FILTER_FORWARD_WANTED(outlink, inlink); > > + } > > + > > + return FFERROR_NOT_READY; > > } > > > > static int query_formats(AVFilterContext *ctx) > > @@ -531,7 +578,6 @@ static const AVFilterPad vpp_inputs[] = { > > .name = "default", > > .type = AVMEDIA_TYPE_VIDEO, > > .config_props = config_input, > > - .filter_frame = filter_frame, > > }, > > { NULL } > > }; > > @@ -554,6 +600,7 @@ AVFilter ff_vf_vpp_qsv = { > > .uninit = vpp_uninit, > > .inputs = vpp_inputs, > > .outputs = vpp_outputs, > > + .activate = activate, > > .priv_class = &vpp_class, > > .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, > > };
diff --git a/libavfilter/qsvvpp.c b/libavfilter/qsvvpp.c index f216b3f248..2e824e67e7 100644 --- a/libavfilter/qsvvpp.c +++ b/libavfilter/qsvvpp.c @@ -27,6 +27,7 @@ #include "libavutil/hwcontext_qsv.h" #include "libavutil/time.h" #include "libavutil/pixdesc.h" +#include "libavutil/fifo.h" #include "internal.h" #include "qsvvpp.h" @@ -37,37 +38,6 @@ #define IS_OPAQUE_MEMORY(mode) (mode & MFX_MEMTYPE_OPAQUE_FRAME) #define IS_SYSTEM_MEMORY(mode) (mode & MFX_MEMTYPE_SYSTEM_MEMORY) -typedef struct QSVFrame { - AVFrame *frame; - mfxFrameSurface1 *surface; - mfxFrameSurface1 surface_internal; /* for system memory */ - struct QSVFrame *next; -} QSVFrame; - -/* abstract struct for all QSV filters */ -struct QSVVPPContext { - mfxSession session; - int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame);/* callback */ - enum AVPixelFormat out_sw_format; /* Real output format */ - mfxVideoParam vpp_param; - mfxFrameInfo *frame_infos; /* frame info for each input */ - - /* members related to the input/output surface */ - int in_mem_mode; - int out_mem_mode; - QSVFrame *in_frame_list; - QSVFrame *out_frame_list; - int nb_surface_ptrs_in; - int nb_surface_ptrs_out; - mfxFrameSurface1 **surface_ptrs_in; - mfxFrameSurface1 **surface_ptrs_out; - - /* MFXVPP extern parameters */ - mfxExtOpaqueSurfaceAlloc opaque_alloc; - mfxExtBuffer **ext_buffers; - int nb_ext_buffers; -}; - static const mfxHandleType handle_types[] = { MFX_HANDLE_VA_DISPLAY, MFX_HANDLE_D3D9_DEVICE_MANAGER, @@ -336,9 +306,9 @@ static int fill_frameinfo_by_link(mfxFrameInfo *frameinfo, AVFilterLink *link) static void clear_unused_frames(QSVFrame *list) { while (list) { - if (list->surface && !list->surface->Data.Locked) { - list->surface = NULL; + if (list->used && !list->queued && !list->surface.Data.Locked) { av_frame_free(&list->frame); + list->used = 0; } list = list->next; } @@ -361,8 +331,10 @@ static QSVFrame *get_free_frame(QSVFrame **list) QSVFrame *out = *list; for (; out; out = out->next) { - if (!out->surface) + if (!out->used) { + out->used = 1; break; + } } if (!out) { @@ -371,6 +343,7 @@ static QSVFrame *get_free_frame(QSVFrame **list) av_log(NULL, AV_LOG_ERROR, "Can't alloc new output frame.\n"); return NULL; } + out->used = 1; out->next = *list; *list = out; } @@ -402,7 +375,7 @@ static QSVFrame *submit_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *p return NULL; } qsv_frame->frame = av_frame_clone(picref); - qsv_frame->surface = (mfxFrameSurface1 *)qsv_frame->frame->data[3]; + qsv_frame->surface = *(mfxFrameSurface1 *)qsv_frame->frame->data[3]; } else { /* make a copy if the input is not padded as libmfx requires */ if (picref->height & 31 || picref->linesize[0] & 31) { @@ -425,27 +398,26 @@ static QSVFrame *submit_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *p qsv_frame->frame = av_frame_clone(picref); if (map_frame_to_surface(qsv_frame->frame, - &qsv_frame->surface_internal) < 0) { + &qsv_frame->surface) < 0) { av_log(ctx, AV_LOG_ERROR, "Unsupported frame.\n"); return NULL; } - qsv_frame->surface = &qsv_frame->surface_internal; } - qsv_frame->surface->Info = s->frame_infos[FF_INLINK_IDX(inlink)]; - qsv_frame->surface->Data.TimeStamp = av_rescale_q(qsv_frame->frame->pts, + qsv_frame->surface.Info = s->frame_infos[FF_INLINK_IDX(inlink)]; + qsv_frame->surface.Data.TimeStamp = av_rescale_q(qsv_frame->frame->pts, inlink->time_base, default_tb); - qsv_frame->surface->Info.PicStruct = + qsv_frame->surface.Info.PicStruct = !qsv_frame->frame->interlaced_frame ? MFX_PICSTRUCT_PROGRESSIVE : (qsv_frame->frame->top_field_first ? MFX_PICSTRUCT_FIELD_TFF : MFX_PICSTRUCT_FIELD_BFF); if (qsv_frame->frame->repeat_pict == 1) - qsv_frame->surface->Info.PicStruct |= MFX_PICSTRUCT_FIELD_REPEATED; + qsv_frame->surface.Info.PicStruct |= MFX_PICSTRUCT_FIELD_REPEATED; else if (qsv_frame->frame->repeat_pict == 2) - qsv_frame->surface->Info.PicStruct |= MFX_PICSTRUCT_FRAME_DOUBLING; + qsv_frame->surface.Info.PicStruct |= MFX_PICSTRUCT_FRAME_DOUBLING; else if (qsv_frame->frame->repeat_pict == 4) - qsv_frame->surface->Info.PicStruct |= MFX_PICSTRUCT_FRAME_TRIPLING; + qsv_frame->surface.Info.PicStruct |= MFX_PICSTRUCT_FRAME_TRIPLING; return qsv_frame; } @@ -476,7 +448,7 @@ static QSVFrame *query_frame(QSVVPPContext *s, AVFilterLink *outlink) return NULL; } - out_frame->surface = (mfxFrameSurface1 *)out_frame->frame->data[3]; + out_frame->surface = *(mfxFrameSurface1 *)out_frame->frame->data[3]; } else { /* Get a frame with aligned dimensions. * Libmfx need system memory being 128x64 aligned */ @@ -490,14 +462,12 @@ static QSVFrame *query_frame(QSVVPPContext *s, AVFilterLink *outlink) out_frame->frame->height = outlink->h; ret = map_frame_to_surface(out_frame->frame, - &out_frame->surface_internal); + &out_frame->surface); if (ret < 0) return NULL; - - out_frame->surface = &out_frame->surface_internal; } - out_frame->surface->Info = s->vpp_param.vpp.Out; + out_frame->surface.Info = s->vpp_param.vpp.Out; return out_frame; } @@ -666,6 +636,16 @@ static int init_vpp_session(AVFilterContext *avctx, QSVVPPContext *s) return 0; } +static unsigned int qsv_fifo_item_size(void) +{ + return sizeof(mfxSyncPoint*) + sizeof(QSVFrame*); +} + +static unsigned int qsv_fifo_size(const AVFifoBuffer* fifo) +{ + return av_fifo_size(fifo)/qsv_fifo_item_size(); +} + int ff_qsvvpp_create(AVFilterContext *avctx, QSVVPPContext **vpp, QSVVPPParam *param) { int i; @@ -738,7 +718,17 @@ int ff_qsvvpp_create(AVFilterContext *avctx, QSVVPPContext **vpp, QSVVPPParam *p s->vpp_param.ExtParam = param->ext_buf; } - s->vpp_param.AsyncDepth = 1; + s->got_frame = 0; + + /** keep fifo size at least 1. Even when async_depth is 0, fifo is used. */ + s->async_fifo = av_fifo_alloc((param->async_depth + 1) * qsv_fifo_item_size()); + s->async_depth = param->async_depth; + if (!s->async_fifo) { + ret = AVERROR(ENOMEM); + goto failed; + } + + s->vpp_param.AsyncDepth = param->async_depth; if (IS_SYSTEM_MEMORY(s->in_mem_mode)) s->vpp_param.IOPattern |= MFX_IOPATTERN_IN_SYSTEM_MEMORY; @@ -793,6 +783,7 @@ int ff_qsvvpp_free(QSVVPPContext **vpp) av_freep(&s->surface_ptrs_out); av_freep(&s->ext_buffers); av_freep(&s->frame_infos); + av_fifo_free(s->async_fifo); av_freep(vpp); return 0; @@ -803,9 +794,29 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *picr AVFilterContext *ctx = inlink->dst; AVFilterLink *outlink = ctx->outputs[0]; mfxSyncPoint sync; - QSVFrame *in_frame, *out_frame; + QSVFrame *in_frame, *out_frame, *tmp; int ret, filter_ret; + while (s->eof && qsv_fifo_size(s->async_fifo)) { + av_fifo_generic_read(s->async_fifo, &tmp, sizeof(tmp), NULL); + av_fifo_generic_read(s->async_fifo, &sync, sizeof(sync), NULL); + if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0) + av_log(ctx, AV_LOG_WARNING, "Sync failed.\n"); + + filter_ret = s->filter_frame(outlink, tmp->frame); + if (filter_ret < 0) { + av_frame_free(&tmp->frame); + ret = filter_ret; + break; + } + tmp->queued = 0; + s->got_frame = 1; + tmp->frame = NULL; + }; + + if (!picref) + return 0; + in_frame = submit_frame(s, inlink, picref); if (!in_frame) { av_log(ctx, AV_LOG_ERROR, "Failed to submit frame on input[%d]\n", @@ -821,8 +832,8 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *picr } do { - ret = MFXVideoVPP_RunFrameVPPAsync(s->session, in_frame->surface, - out_frame->surface, NULL, &sync); + ret = MFXVideoVPP_RunFrameVPPAsync(s->session, &in_frame->surface, + &out_frame->surface, NULL, &sync); if (ret == MFX_WRN_DEVICE_BUSY) av_usleep(500); } while (ret == MFX_WRN_DEVICE_BUSY); @@ -833,20 +844,32 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *picr ret = AVERROR(EAGAIN); break; } + out_frame->frame->pts = av_rescale_q(out_frame->surface.Data.TimeStamp, + default_tb, outlink->time_base); - if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0) - av_log(ctx, AV_LOG_WARNING, "Sync failed.\n"); + out_frame->queued = 1; + av_fifo_generic_write(s->async_fifo, &out_frame, sizeof(out_frame), NULL); + av_fifo_generic_write(s->async_fifo, &sync, sizeof(sync), NULL); - out_frame->frame->pts = av_rescale_q(out_frame->surface->Data.TimeStamp, - default_tb, outlink->time_base); - filter_ret = s->filter_frame(outlink, out_frame->frame); - if (filter_ret < 0) { - av_frame_free(&out_frame->frame); - ret = filter_ret; - break; + if (qsv_fifo_size(s->async_fifo) > s->async_depth) { + av_fifo_generic_read(s->async_fifo, &tmp, sizeof(tmp), NULL); + av_fifo_generic_read(s->async_fifo, &sync, sizeof(sync), NULL); + + if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0) + av_log(ctx, AV_LOG_WARNING, "Sync failed.\n"); + + filter_ret = s->filter_frame(outlink, tmp->frame); + if (filter_ret < 0) { + av_frame_free(&tmp->frame); + ret = filter_ret; + break; + } + + tmp->queued = 0; + s->got_frame = 1; + tmp->frame = NULL; } - out_frame->frame = NULL; } while(ret == MFX_ERR_MORE_SURFACE); return ret; diff --git a/libavfilter/qsvvpp.h b/libavfilter/qsvvpp.h index b4baeedf9e..48c8ffc2d2 100644 --- a/libavfilter/qsvvpp.h +++ b/libavfilter/qsvvpp.h @@ -27,6 +27,7 @@ #include <mfx/mfxvideo.h> #include "avfilter.h" +#include "libavutil/fifo.h" #define FF_INLINK_IDX(link) ((int)((link)->dstpad - (link)->dst->input_pads)) #define FF_OUTLINK_IDX(link) ((int)((link)->srcpad - (link)->src->output_pads)) @@ -39,7 +40,44 @@ ((MFX_VERSION.Major > (MAJOR)) || \ (MFX_VERSION.Major == (MAJOR) && MFX_VERSION.Minor >= (MINOR))) -typedef struct QSVVPPContext QSVVPPContext; +#define VPP_ASYNC_DEPTH_DEFAULT 1 + +typedef struct QSVFrame { + AVFrame *frame; + mfxFrameSurface1 surface; + struct QSVFrame *next; + int queued; + int used; +} QSVFrame; + +typedef struct QSVVPPContext { + mfxSession session; + int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame); /**< callback */ + enum AVPixelFormat out_sw_format; /**< Real output format */ + mfxVideoParam vpp_param; + mfxFrameInfo *frame_infos; /**< frame info for each input */ + + /** members related to the input/output surface */ + int in_mem_mode; + int out_mem_mode; + QSVFrame *in_frame_list; + QSVFrame *out_frame_list; + int nb_surface_ptrs_in; + int nb_surface_ptrs_out; + mfxFrameSurface1 **surface_ptrs_in; + mfxFrameSurface1 **surface_ptrs_out; + + /** MFXVPP extern parameters */ + mfxExtOpaqueSurfaceAlloc opaque_alloc; + mfxExtBuffer **ext_buffers; + int nb_ext_buffers; + + int got_frame; + int async_depth; + int eof; + /** order with frame_out, sync */ + AVFifoBuffer *async_fifo; +} QSVVPPContext; typedef struct QSVVPPCrop { int in_idx; ///< Input index @@ -60,6 +98,8 @@ typedef struct QSVVPPParam { /* Crop information for each input, if needed */ int num_crop; QSVVPPCrop *crop; + + int async_depth; } QSVVPPParam; /* create and initialize the QSV session */ diff --git a/libavfilter/vf_deinterlace_qsv.c b/libavfilter/vf_deinterlace_qsv.c index 89a282f99e..a620567de2 100644 --- a/libavfilter/vf_deinterlace_qsv.c +++ b/libavfilter/vf_deinterlace_qsv.c @@ -47,14 +47,6 @@ enum { QSVDEINT_MORE_INPUT, }; -typedef struct QSVFrame { - AVFrame *frame; - mfxFrameSurface1 surface; - int used; - - struct QSVFrame *next; -} QSVFrame; - typedef struct QSVDeintContext { const AVClass *class; diff --git a/libavfilter/vf_vpp_qsv.c b/libavfilter/vf_vpp_qsv.c index 5d57707455..83bdf1276c 100644 --- a/libavfilter/vf_vpp_qsv.c +++ b/libavfilter/vf_vpp_qsv.c @@ -32,6 +32,7 @@ #include "formats.h" #include "internal.h" #include "avfilter.h" +#include "filters.h" #include "libavcodec/avcodec.h" #include "libavformat/avformat.h" @@ -93,6 +94,9 @@ typedef struct VPPContext{ char *cx, *cy, *cw, *ch; char *ow, *oh; char *output_format_str; + + int async_depth; + int eof; } VPPContext; static const AVOption options[] = { @@ -128,6 +132,7 @@ static const AVOption options[] = { { "h", "Output video height", OFFSET(oh), AV_OPT_TYPE_STRING, { .str="w*ch/cw" }, 0, 255, .flags = FLAGS }, { "height", "Output video height", OFFSET(oh), AV_OPT_TYPE_STRING, { .str="w*ch/cw" }, 0, 255, .flags = FLAGS }, { "format", "Output pixel format", OFFSET(output_format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS }, + { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(async_depth), AV_OPT_TYPE_INT, { .i64 = VPP_ASYNC_DEPTH_DEFAULT }, 0, INT_MAX, .flags = FLAGS }, { NULL } }; @@ -303,6 +308,7 @@ static int config_output(AVFilterLink *outlink) param.filter_frame = NULL; param.num_ext_buf = 0; param.ext_buf = ext_buf; + param.async_depth = vpp->async_depth; if (inlink->format == AV_PIX_FMT_QSV) { if (!inlink->hw_frames_ctx || !inlink->hw_frames_ctx->data) @@ -467,23 +473,64 @@ static int config_output(AVFilterLink *outlink) return 0; } -static int filter_frame(AVFilterLink *inlink, AVFrame *picref) +static int activate(AVFilterContext *ctx) { - int ret = 0; - AVFilterContext *ctx = inlink->dst; - VPPContext *vpp = inlink->dst->priv; - AVFilterLink *outlink = ctx->outputs[0]; - - if (vpp->qsv) { - ret = ff_qsvvpp_filter_frame(vpp->qsv, inlink, picref); - av_frame_free(&picref); + AVFilterLink *inlink = ctx->inputs[0]; + AVFilterLink *outlink = ctx->outputs[0]; + VPPContext *s =ctx->priv; + QSVVPPContext *qsv = s->qsv; + AVFrame *in = NULL; + int ret, status; + int64_t pts; + + FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink); + + if (!s->eof) { + ret = ff_inlink_consume_frame(inlink, &in); + if (ret < 0) + return ret; + + if (ff_inlink_acknowledge_status(inlink, &status, &pts)) { + if (status == AVERROR_EOF) { + s->eof = 1; + } + } + } + + if (qsv) { + if (in || s->eof) { + qsv->eof = s->eof; + ret = ff_qsvvpp_filter_frame(qsv, inlink, in); + av_frame_free(&in); + + if (s->eof) { + ff_outlink_set_status(outlink, status, pts); + return 0; + } + + if (qsv->got_frame) { + qsv->got_frame = 0; + return ret; + } + } } else { - if (picref->pts != AV_NOPTS_VALUE) - picref->pts = av_rescale_q(picref->pts, inlink->time_base, outlink->time_base); - ret = ff_filter_frame(outlink, picref); + if (in) { + if (in->pts != AV_NOPTS_VALUE) + in->pts = av_rescale_q(in->pts, inlink->time_base, outlink->time_base); + + ret = ff_filter_frame(outlink, in); + return ret; + } } - return ret; + if (s->eof) { + ff_outlink_set_status(outlink, status, pts); + return 0; + } else { + FF_FILTER_FORWARD_WANTED(outlink, inlink); + } + + return FFERROR_NOT_READY; } static int query_formats(AVFilterContext *ctx) @@ -531,7 +578,6 @@ static const AVFilterPad vpp_inputs[] = { .name = "default", .type = AVMEDIA_TYPE_VIDEO, .config_props = config_input, - .filter_frame = filter_frame, }, { NULL } }; @@ -554,6 +600,7 @@ AVFilter ff_vf_vpp_qsv = { .uninit = vpp_uninit, .inputs = vpp_inputs, .outputs = vpp_outputs, + .activate = activate, .priv_class = &vpp_class, .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, };
Async depth will allow qsv filter cache few frames, and avoid force switch and end filter task frame by frame. This change will improve performance for some multi-task case, for example 1:N transcode( decode + vpp + encode) with all QSV plugins. Signed-off-by: Fei Wang <fei.w.wang@intel.com> --- libavfilter/qsvvpp.c | 147 ++++++++++++++++++------------- libavfilter/qsvvpp.h | 42 ++++++++- libavfilter/vf_deinterlace_qsv.c | 8 -- libavfilter/vf_vpp_qsv.c | 75 +++++++++++++--- 4 files changed, 187 insertions(+), 85 deletions(-)