From patchwork Mon Mar 15 05:10:36 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Fei Wang X-Patchwork-Id: 26393 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id 9389944A4D0 for ; Mon, 15 Mar 2021 07:13:07 +0200 (EET) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 69B3868AEA7; Mon, 15 Mar 2021 07:13:07 +0200 (EET) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from mga07.intel.com (mga07.intel.com [134.134.136.100]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 802DB680A43 for ; Mon, 15 Mar 2021 07:13:00 +0200 (EET) IronPort-SDR: ceMYWOvabdc7z2groRCuJbW7CN2cKqfRyUi3JXRyEzHl0bFMnIpJMPy9w3Tc507hLonWxG1ZuP 0ckn1CeiUswg== X-IronPort-AV: E=McAfee;i="6000,8403,9923"; a="253053804" X-IronPort-AV: E=Sophos;i="5.81,249,1610438400"; d="scan'208";a="253053804" Received: from fmsmga008.fm.intel.com ([10.253.24.58]) by orsmga105.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 14 Mar 2021 22:12:51 -0700 IronPort-SDR: ExPaPzPGIWRRtNfgoLWxs5H9SfPW+Cbx70h63kO+1sgceLqSFDV1Gkj6FyGpJRXtVlovKEg6K7 ZRS0PvFH0nAQ== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.81,249,1610438400"; d="scan'208";a="405064364" Received: from f.sh.intel.com ([10.239.159.132]) by fmsmga008.fm.intel.com with ESMTP; 14 Mar 2021 22:12:49 -0700 From: Fei Wang To: ffmpeg-devel@ffmpeg.org Date: Mon, 15 Mar 2021 13:10:36 +0800 Message-Id: <20210315051036.4767-1-fei.w.wang@intel.com> X-Mailer: git-send-email 2.17.1 Subject: [FFmpeg-devel] [PATCH v2] lavfi/qsvvpp: support async depth X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: Fei Wang MIME-Version: 1.0 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" Async depth will allow qsv filter cache few frames, and avoid force switch and end filter task frame by frame. This change will improve performance for some multi-task case, for example 1:N transcode( decode + vpp + encode) with all QSV plugins. Signed-off-by: Fei Wang --- Change: combine used and queued into queued in QSVFrame. libavfilter/qsvvpp.c | 153 ++++++++++++++++++------------- libavfilter/qsvvpp.h | 41 ++++++++- libavfilter/vf_deinterlace_qsv.c | 14 +-- libavfilter/vf_vpp_qsv.c | 75 ++++++++++++--- 4 files changed, 193 insertions(+), 90 deletions(-) diff --git a/libavfilter/qsvvpp.c b/libavfilter/qsvvpp.c index f216b3f248..e7c7a12cfa 100644 --- a/libavfilter/qsvvpp.c +++ b/libavfilter/qsvvpp.c @@ -27,6 +27,7 @@ #include "libavutil/hwcontext_qsv.h" #include "libavutil/time.h" #include "libavutil/pixdesc.h" +#include "libavutil/fifo.h" #include "internal.h" #include "qsvvpp.h" @@ -37,37 +38,6 @@ #define IS_OPAQUE_MEMORY(mode) (mode & MFX_MEMTYPE_OPAQUE_FRAME) #define IS_SYSTEM_MEMORY(mode) (mode & MFX_MEMTYPE_SYSTEM_MEMORY) -typedef struct QSVFrame { - AVFrame *frame; - mfxFrameSurface1 *surface; - mfxFrameSurface1 surface_internal; /* for system memory */ - struct QSVFrame *next; -} QSVFrame; - -/* abstract struct for all QSV filters */ -struct QSVVPPContext { - mfxSession session; - int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame);/* callback */ - enum AVPixelFormat out_sw_format; /* Real output format */ - mfxVideoParam vpp_param; - mfxFrameInfo *frame_infos; /* frame info for each input */ - - /* members related to the input/output surface */ - int in_mem_mode; - int out_mem_mode; - QSVFrame *in_frame_list; - QSVFrame *out_frame_list; - int nb_surface_ptrs_in; - int nb_surface_ptrs_out; - mfxFrameSurface1 **surface_ptrs_in; - mfxFrameSurface1 **surface_ptrs_out; - - /* MFXVPP extern parameters */ - mfxExtOpaqueSurfaceAlloc opaque_alloc; - mfxExtBuffer **ext_buffers; - int nb_ext_buffers; -}; - static const mfxHandleType handle_types[] = { MFX_HANDLE_VA_DISPLAY, MFX_HANDLE_D3D9_DEVICE_MANAGER, @@ -336,9 +306,11 @@ static int fill_frameinfo_by_link(mfxFrameInfo *frameinfo, AVFilterLink *link) static void clear_unused_frames(QSVFrame *list) { while (list) { - if (list->surface && !list->surface->Data.Locked) { - list->surface = NULL; + /* list->queued==1 means the frame is not cached in VPP + * process any more, it can be released to pool. */ + if ((list->queued == 1) && !list->surface.Data.Locked) { av_frame_free(&list->frame); + list->queued = 0; } list = list->next; } @@ -361,8 +333,10 @@ static QSVFrame *get_free_frame(QSVFrame **list) QSVFrame *out = *list; for (; out; out = out->next) { - if (!out->surface) + if (!out->queued) { + out->queued = 1; break; + } } if (!out) { @@ -371,8 +345,9 @@ static QSVFrame *get_free_frame(QSVFrame **list) av_log(NULL, AV_LOG_ERROR, "Can't alloc new output frame.\n"); return NULL; } - out->next = *list; - *list = out; + out->queued = 1; + out->next = *list; + *list = out; } return out; @@ -402,7 +377,7 @@ static QSVFrame *submit_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *p return NULL; } qsv_frame->frame = av_frame_clone(picref); - qsv_frame->surface = (mfxFrameSurface1 *)qsv_frame->frame->data[3]; + qsv_frame->surface = *(mfxFrameSurface1 *)qsv_frame->frame->data[3]; } else { /* make a copy if the input is not padded as libmfx requires */ if (picref->height & 31 || picref->linesize[0] & 31) { @@ -425,27 +400,26 @@ static QSVFrame *submit_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *p qsv_frame->frame = av_frame_clone(picref); if (map_frame_to_surface(qsv_frame->frame, - &qsv_frame->surface_internal) < 0) { + &qsv_frame->surface) < 0) { av_log(ctx, AV_LOG_ERROR, "Unsupported frame.\n"); return NULL; } - qsv_frame->surface = &qsv_frame->surface_internal; } - qsv_frame->surface->Info = s->frame_infos[FF_INLINK_IDX(inlink)]; - qsv_frame->surface->Data.TimeStamp = av_rescale_q(qsv_frame->frame->pts, + qsv_frame->surface.Info = s->frame_infos[FF_INLINK_IDX(inlink)]; + qsv_frame->surface.Data.TimeStamp = av_rescale_q(qsv_frame->frame->pts, inlink->time_base, default_tb); - qsv_frame->surface->Info.PicStruct = + qsv_frame->surface.Info.PicStruct = !qsv_frame->frame->interlaced_frame ? MFX_PICSTRUCT_PROGRESSIVE : (qsv_frame->frame->top_field_first ? MFX_PICSTRUCT_FIELD_TFF : MFX_PICSTRUCT_FIELD_BFF); if (qsv_frame->frame->repeat_pict == 1) - qsv_frame->surface->Info.PicStruct |= MFX_PICSTRUCT_FIELD_REPEATED; + qsv_frame->surface.Info.PicStruct |= MFX_PICSTRUCT_FIELD_REPEATED; else if (qsv_frame->frame->repeat_pict == 2) - qsv_frame->surface->Info.PicStruct |= MFX_PICSTRUCT_FRAME_DOUBLING; + qsv_frame->surface.Info.PicStruct |= MFX_PICSTRUCT_FRAME_DOUBLING; else if (qsv_frame->frame->repeat_pict == 4) - qsv_frame->surface->Info.PicStruct |= MFX_PICSTRUCT_FRAME_TRIPLING; + qsv_frame->surface.Info.PicStruct |= MFX_PICSTRUCT_FRAME_TRIPLING; return qsv_frame; } @@ -476,7 +450,7 @@ static QSVFrame *query_frame(QSVVPPContext *s, AVFilterLink *outlink) return NULL; } - out_frame->surface = (mfxFrameSurface1 *)out_frame->frame->data[3]; + out_frame->surface = *(mfxFrameSurface1 *)out_frame->frame->data[3]; } else { /* Get a frame with aligned dimensions. * Libmfx need system memory being 128x64 aligned */ @@ -490,14 +464,12 @@ static QSVFrame *query_frame(QSVVPPContext *s, AVFilterLink *outlink) out_frame->frame->height = outlink->h; ret = map_frame_to_surface(out_frame->frame, - &out_frame->surface_internal); + &out_frame->surface); if (ret < 0) return NULL; - - out_frame->surface = &out_frame->surface_internal; } - out_frame->surface->Info = s->vpp_param.vpp.Out; + out_frame->surface.Info = s->vpp_param.vpp.Out; return out_frame; } @@ -666,6 +638,16 @@ static int init_vpp_session(AVFilterContext *avctx, QSVVPPContext *s) return 0; } +static unsigned int qsv_fifo_item_size(void) +{ + return sizeof(mfxSyncPoint*) + sizeof(QSVFrame*); +} + +static unsigned int qsv_fifo_size(const AVFifoBuffer* fifo) +{ + return av_fifo_size(fifo)/qsv_fifo_item_size(); +} + int ff_qsvvpp_create(AVFilterContext *avctx, QSVVPPContext **vpp, QSVVPPParam *param) { int i; @@ -738,7 +720,17 @@ int ff_qsvvpp_create(AVFilterContext *avctx, QSVVPPContext **vpp, QSVVPPParam *p s->vpp_param.ExtParam = param->ext_buf; } - s->vpp_param.AsyncDepth = 1; + s->got_frame = 0; + + /** keep fifo size at least 1. Even when async_depth is 0, fifo is used. */ + s->async_fifo = av_fifo_alloc((param->async_depth + 1) * qsv_fifo_item_size()); + s->async_depth = param->async_depth; + if (!s->async_fifo) { + ret = AVERROR(ENOMEM); + goto failed; + } + + s->vpp_param.AsyncDepth = param->async_depth; if (IS_SYSTEM_MEMORY(s->in_mem_mode)) s->vpp_param.IOPattern |= MFX_IOPATTERN_IN_SYSTEM_MEMORY; @@ -793,6 +785,7 @@ int ff_qsvvpp_free(QSVVPPContext **vpp) av_freep(&s->surface_ptrs_out); av_freep(&s->ext_buffers); av_freep(&s->frame_infos); + av_fifo_free(s->async_fifo); av_freep(vpp); return 0; @@ -803,9 +796,29 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *picr AVFilterContext *ctx = inlink->dst; AVFilterLink *outlink = ctx->outputs[0]; mfxSyncPoint sync; - QSVFrame *in_frame, *out_frame; + QSVFrame *in_frame, *out_frame, *tmp; int ret, filter_ret; + while (s->eof && qsv_fifo_size(s->async_fifo)) { + av_fifo_generic_read(s->async_fifo, &tmp, sizeof(tmp), NULL); + av_fifo_generic_read(s->async_fifo, &sync, sizeof(sync), NULL); + if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0) + av_log(ctx, AV_LOG_WARNING, "Sync failed.\n"); + + filter_ret = s->filter_frame(outlink, tmp->frame); + if (filter_ret < 0) { + av_frame_free(&tmp->frame); + ret = filter_ret; + break; + } + tmp->queued--; + s->got_frame = 1; + tmp->frame = NULL; + }; + + if (!picref) + return 0; + in_frame = submit_frame(s, inlink, picref); if (!in_frame) { av_log(ctx, AV_LOG_ERROR, "Failed to submit frame on input[%d]\n", @@ -821,8 +834,8 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *picr } do { - ret = MFXVideoVPP_RunFrameVPPAsync(s->session, in_frame->surface, - out_frame->surface, NULL, &sync); + ret = MFXVideoVPP_RunFrameVPPAsync(s->session, &in_frame->surface, + &out_frame->surface, NULL, &sync); if (ret == MFX_WRN_DEVICE_BUSY) av_usleep(500); } while (ret == MFX_WRN_DEVICE_BUSY); @@ -833,20 +846,32 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *picr ret = AVERROR(EAGAIN); break; } + out_frame->frame->pts = av_rescale_q(out_frame->surface.Data.TimeStamp, + default_tb, outlink->time_base); - if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0) - av_log(ctx, AV_LOG_WARNING, "Sync failed.\n"); + out_frame->queued++; + av_fifo_generic_write(s->async_fifo, &out_frame, sizeof(out_frame), NULL); + av_fifo_generic_write(s->async_fifo, &sync, sizeof(sync), NULL); - out_frame->frame->pts = av_rescale_q(out_frame->surface->Data.TimeStamp, - default_tb, outlink->time_base); - filter_ret = s->filter_frame(outlink, out_frame->frame); - if (filter_ret < 0) { - av_frame_free(&out_frame->frame); - ret = filter_ret; - break; + if (qsv_fifo_size(s->async_fifo) > s->async_depth) { + av_fifo_generic_read(s->async_fifo, &tmp, sizeof(tmp), NULL); + av_fifo_generic_read(s->async_fifo, &sync, sizeof(sync), NULL); + + if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0) + av_log(ctx, AV_LOG_WARNING, "Sync failed.\n"); + + filter_ret = s->filter_frame(outlink, tmp->frame); + if (filter_ret < 0) { + av_frame_free(&tmp->frame); + ret = filter_ret; + break; + } + + tmp->queued--; + s->got_frame = 1; + tmp->frame = NULL; } - out_frame->frame = NULL; } while(ret == MFX_ERR_MORE_SURFACE); return ret; diff --git a/libavfilter/qsvvpp.h b/libavfilter/qsvvpp.h index b4baeedf9e..26be0d8ea8 100644 --- a/libavfilter/qsvvpp.h +++ b/libavfilter/qsvvpp.h @@ -27,6 +27,7 @@ #include #include "avfilter.h" +#include "libavutil/fifo.h" #define FF_INLINK_IDX(link) ((int)((link)->dstpad - (link)->dst->input_pads)) #define FF_OUTLINK_IDX(link) ((int)((link)->srcpad - (link)->src->output_pads)) @@ -39,7 +40,43 @@ ((MFX_VERSION.Major > (MAJOR)) || \ (MFX_VERSION.Major == (MAJOR) && MFX_VERSION.Minor >= (MINOR))) -typedef struct QSVVPPContext QSVVPPContext; +#define VPP_ASYNC_DEPTH_DEFAULT 1 + +typedef struct QSVFrame { + AVFrame *frame; + mfxFrameSurface1 surface; + struct QSVFrame *next; + int queued; +} QSVFrame; + +typedef struct QSVVPPContext { + mfxSession session; + int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame); /**< callback */ + enum AVPixelFormat out_sw_format; /**< Real output format */ + mfxVideoParam vpp_param; + mfxFrameInfo *frame_infos; /**< frame info for each input */ + + /** members related to the input/output surface */ + int in_mem_mode; + int out_mem_mode; + QSVFrame *in_frame_list; + QSVFrame *out_frame_list; + int nb_surface_ptrs_in; + int nb_surface_ptrs_out; + mfxFrameSurface1 **surface_ptrs_in; + mfxFrameSurface1 **surface_ptrs_out; + + /** MFXVPP extern parameters */ + mfxExtOpaqueSurfaceAlloc opaque_alloc; + mfxExtBuffer **ext_buffers; + int nb_ext_buffers; + + int got_frame; + int async_depth; + int eof; + /** order with frame_out, sync */ + AVFifoBuffer *async_fifo; +} QSVVPPContext; typedef struct QSVVPPCrop { int in_idx; ///< Input index @@ -60,6 +97,8 @@ typedef struct QSVVPPParam { /* Crop information for each input, if needed */ int num_crop; QSVVPPCrop *crop; + + int async_depth; } QSVVPPParam; /* create and initialize the QSV session */ diff --git a/libavfilter/vf_deinterlace_qsv.c b/libavfilter/vf_deinterlace_qsv.c index 89a282f99e..34feb616ab 100644 --- a/libavfilter/vf_deinterlace_qsv.c +++ b/libavfilter/vf_deinterlace_qsv.c @@ -47,14 +47,6 @@ enum { QSVDEINT_MORE_INPUT, }; -typedef struct QSVFrame { - AVFrame *frame; - mfxFrameSurface1 surface; - int used; - - struct QSVFrame *next; -} QSVFrame; - typedef struct QSVDeintContext { const AVClass *class; @@ -376,7 +368,7 @@ static void clear_unused_frames(QSVDeintContext *s) while (cur) { if (!cur->surface.Data.Locked) { av_frame_free(&cur->frame); - cur->used = 0; + cur->queued = 0; } cur = cur->next; } @@ -391,7 +383,7 @@ static int get_free_frame(QSVDeintContext *s, QSVFrame **f) frame = s->work_frames; last = &s->work_frames; while (frame) { - if (!frame->used) { + if (!frame->queued) { *f = frame; return 0; } @@ -453,7 +445,7 @@ static int submit_frame(AVFilterContext *ctx, AVFrame *frame, (AVRational){1, 90000}); *surface = &qf->surface; - qf->used = 1; + qf->queued = 1; return 0; } diff --git a/libavfilter/vf_vpp_qsv.c b/libavfilter/vf_vpp_qsv.c index 5d57707455..83bdf1276c 100644 --- a/libavfilter/vf_vpp_qsv.c +++ b/libavfilter/vf_vpp_qsv.c @@ -32,6 +32,7 @@ #include "formats.h" #include "internal.h" #include "avfilter.h" +#include "filters.h" #include "libavcodec/avcodec.h" #include "libavformat/avformat.h" @@ -93,6 +94,9 @@ typedef struct VPPContext{ char *cx, *cy, *cw, *ch; char *ow, *oh; char *output_format_str; + + int async_depth; + int eof; } VPPContext; static const AVOption options[] = { @@ -128,6 +132,7 @@ static const AVOption options[] = { { "h", "Output video height", OFFSET(oh), AV_OPT_TYPE_STRING, { .str="w*ch/cw" }, 0, 255, .flags = FLAGS }, { "height", "Output video height", OFFSET(oh), AV_OPT_TYPE_STRING, { .str="w*ch/cw" }, 0, 255, .flags = FLAGS }, { "format", "Output pixel format", OFFSET(output_format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS }, + { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(async_depth), AV_OPT_TYPE_INT, { .i64 = VPP_ASYNC_DEPTH_DEFAULT }, 0, INT_MAX, .flags = FLAGS }, { NULL } }; @@ -303,6 +308,7 @@ static int config_output(AVFilterLink *outlink) param.filter_frame = NULL; param.num_ext_buf = 0; param.ext_buf = ext_buf; + param.async_depth = vpp->async_depth; if (inlink->format == AV_PIX_FMT_QSV) { if (!inlink->hw_frames_ctx || !inlink->hw_frames_ctx->data) @@ -467,23 +473,64 @@ static int config_output(AVFilterLink *outlink) return 0; } -static int filter_frame(AVFilterLink *inlink, AVFrame *picref) +static int activate(AVFilterContext *ctx) { - int ret = 0; - AVFilterContext *ctx = inlink->dst; - VPPContext *vpp = inlink->dst->priv; - AVFilterLink *outlink = ctx->outputs[0]; - - if (vpp->qsv) { - ret = ff_qsvvpp_filter_frame(vpp->qsv, inlink, picref); - av_frame_free(&picref); + AVFilterLink *inlink = ctx->inputs[0]; + AVFilterLink *outlink = ctx->outputs[0]; + VPPContext *s =ctx->priv; + QSVVPPContext *qsv = s->qsv; + AVFrame *in = NULL; + int ret, status; + int64_t pts; + + FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink); + + if (!s->eof) { + ret = ff_inlink_consume_frame(inlink, &in); + if (ret < 0) + return ret; + + if (ff_inlink_acknowledge_status(inlink, &status, &pts)) { + if (status == AVERROR_EOF) { + s->eof = 1; + } + } + } + + if (qsv) { + if (in || s->eof) { + qsv->eof = s->eof; + ret = ff_qsvvpp_filter_frame(qsv, inlink, in); + av_frame_free(&in); + + if (s->eof) { + ff_outlink_set_status(outlink, status, pts); + return 0; + } + + if (qsv->got_frame) { + qsv->got_frame = 0; + return ret; + } + } } else { - if (picref->pts != AV_NOPTS_VALUE) - picref->pts = av_rescale_q(picref->pts, inlink->time_base, outlink->time_base); - ret = ff_filter_frame(outlink, picref); + if (in) { + if (in->pts != AV_NOPTS_VALUE) + in->pts = av_rescale_q(in->pts, inlink->time_base, outlink->time_base); + + ret = ff_filter_frame(outlink, in); + return ret; + } } - return ret; + if (s->eof) { + ff_outlink_set_status(outlink, status, pts); + return 0; + } else { + FF_FILTER_FORWARD_WANTED(outlink, inlink); + } + + return FFERROR_NOT_READY; } static int query_formats(AVFilterContext *ctx) @@ -531,7 +578,6 @@ static const AVFilterPad vpp_inputs[] = { .name = "default", .type = AVMEDIA_TYPE_VIDEO, .config_props = config_input, - .filter_frame = filter_frame, }, { NULL } }; @@ -554,6 +600,7 @@ AVFilter ff_vf_vpp_qsv = { .uninit = vpp_uninit, .inputs = vpp_inputs, .outputs = vpp_outputs, + .activate = activate, .priv_class = &vpp_class, .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, };