From patchwork Tue Mar 26 05:38:24 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Fu, Linjie" X-Patchwork-Id: 12445 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id 67CBE4473ED for ; Tue, 26 Mar 2019 07:38:31 +0200 (EET) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 3EDCD68A0C9; Tue, 26 Mar 2019 07:38:31 +0200 (EET) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from mga12.intel.com (mga12.intel.com [192.55.52.136]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id C8C4E68094D for ; Tue, 26 Mar 2019 07:38:23 +0200 (EET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga007.jf.intel.com ([10.7.209.58]) by fmsmga106.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 25 Mar 2019 22:38:21 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.60,271,1549958400"; d="scan'208";a="125880236" Received: from media_lj_kbl.sh.intel.com ([10.239.13.101]) by orsmga007.jf.intel.com with ESMTP; 25 Mar 2019 22:38:20 -0700 From: Linjie Fu To: ffmpeg-devel@ffmpeg.org Date: Tue, 26 Mar 2019 13:38:24 +0800 Message-Id: <20190326053824.24220-1-linjie.fu@intel.com> X-Mailer: git-send-email 2.17.1 Subject: [FFmpeg-devel] [PATCH 1/3] lavc/qsvdec: add support for gpu_copy X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: ChaoX A Liu , Linjie Fu MIME-Version: 1.0 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" Add support for GPU copy when QSV decoders works in system memory mode. However, memory must be sequent and aligned with 128x64 to enable this feature.(first introduced in FFmpeg 3.3.1) GPUCopy = MFX_GPUCOPY_ON leads to performance improvement up to x10. CMD: ffmpeg -init_hw_device qsv=hw -filter_hw_device hw -c:v h264_qsv -gpu_copy on -i input.h264 -pix_fmt yuv420p out.yuv Signed-off-by: Linjie Fu Signed-off-by: ChaoX A Liu --- libavcodec/qsv.c | 27 +++++++++++++------- libavcodec/qsv_internal.h | 6 ++--- libavcodec/qsvdec.c | 53 ++++++++++++++++++++++++++++++++++----- libavcodec/qsvdec.h | 2 ++ libavcodec/qsvdec_h2645.c | 10 ++++++++ libavcodec/qsvdec_other.c | 5 ++++ libavcodec/qsvenc.c | 7 +++--- 7 files changed, 89 insertions(+), 21 deletions(-) diff --git a/libavcodec/qsv.c b/libavcodec/qsv.c index bb0d79588c..40e6c677cb 100644 --- a/libavcodec/qsv.c +++ b/libavcodec/qsv.c @@ -277,15 +277,19 @@ load_plugin_fail: } int ff_qsv_init_internal_session(AVCodecContext *avctx, mfxSession *session, - const char *load_plugins) + const char *load_plugins, int gpu_copy) { - mfxIMPL impl = MFX_IMPL_AUTO_ANY; - mfxVersion ver = { { QSV_VERSION_MINOR, QSV_VERSION_MAJOR } }; + mfxIMPL impl = MFX_IMPL_AUTO_ANY; + mfxVersion ver = { { QSV_VERSION_MINOR, QSV_VERSION_MAJOR } }; + mfxInitParam init_par = { MFX_IMPL_AUTO_ANY }; const char *desc; int ret; - ret = MFXInit(impl, &ver, session); + init_par.GPUCopy = gpu_copy; + init_par.Implementation = impl; + init_par.Version = ver; + ret = MFXInitEx(init_par, session); if (ret < 0) return ff_qsv_print_error(avctx, ret, "Error initializing an internal MFX session"); @@ -571,7 +575,8 @@ static mfxStatus qsv_frame_get_hdl(mfxHDL pthis, mfxMemId mid, mfxHDL *hdl) } int ff_qsv_init_session_device(AVCodecContext *avctx, mfxSession *psession, - AVBufferRef *device_ref, const char *load_plugins) + AVBufferRef *device_ref, const char *load_plugins, + int gpu_copy) { static const mfxHandleType handle_types[] = { MFX_HANDLE_VA_DISPLAY, @@ -581,11 +586,12 @@ int ff_qsv_init_session_device(AVCodecContext *avctx, mfxSession *psession, AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)device_ref->data; AVQSVDeviceContext *device_hwctx = device_ctx->hwctx; mfxSession parent_session = device_hwctx->session; + mfxInitParam init_par = { MFX_IMPL_AUTO_ANY }; + mfxHDL handle = NULL; mfxSession session; mfxVersion ver; mfxIMPL impl; - mfxHDL handle = NULL; mfxHandleType handle_type; mfxStatus err; @@ -611,7 +617,10 @@ int ff_qsv_init_session_device(AVCodecContext *avctx, mfxSession *psession, "from the session\n"); } - err = MFXInit(impl, &ver, &session); + init_par.GPUCopy = gpu_copy; + init_par.Implementation = impl; + init_par.Version = ver; + err = MFXInitEx(init_par, &session); if (err != MFX_ERR_NONE) return ff_qsv_print_error(avctx, err, "Error initializing a child MFX session"); @@ -642,7 +651,7 @@ int ff_qsv_init_session_device(AVCodecContext *avctx, mfxSession *psession, int ff_qsv_init_session_frames(AVCodecContext *avctx, mfxSession *psession, QSVFramesContext *qsv_frames_ctx, - const char *load_plugins, int opaque) + const char *load_plugins, int opaque, int gpu_copy) { mfxFrameAllocator frame_allocator = { .pthis = qsv_frames_ctx, @@ -662,7 +671,7 @@ int ff_qsv_init_session_frames(AVCodecContext *avctx, mfxSession *psession, int ret; ret = ff_qsv_init_session_device(avctx, &session, - frames_ctx->device_ref, load_plugins); + frames_ctx->device_ref, load_plugins, gpu_copy); if (ret < 0) return ret; diff --git a/libavcodec/qsv_internal.h b/libavcodec/qsv_internal.h index 394c558883..8be6c3757c 100644 --- a/libavcodec/qsv_internal.h +++ b/libavcodec/qsv_internal.h @@ -95,14 +95,14 @@ int ff_qsv_map_pixfmt(enum AVPixelFormat format, uint32_t *fourcc); enum AVPictureType ff_qsv_map_pictype(int mfx_pic_type); int ff_qsv_init_internal_session(AVCodecContext *avctx, mfxSession *session, - const char *load_plugins); + const char *load_plugins, int gpu_copy); int ff_qsv_init_session_device(AVCodecContext *avctx, mfxSession *psession, - AVBufferRef *device_ref, const char *load_plugins); + AVBufferRef *device_ref, const char *load_plugins, int gpu_copy); int ff_qsv_init_session_frames(AVCodecContext *avctx, mfxSession *session, QSVFramesContext *qsv_frames_ctx, - const char *load_plugins, int opaque); + const char *load_plugins, int opaque, int gpu_copy); int ff_qsv_find_surface_idx(QSVFramesContext *ctx, QSVFrame *frame); diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c index 4a0be811fb..5dd2b3834b 100644 --- a/libavcodec/qsvdec.c +++ b/libavcodec/qsvdec.c @@ -34,9 +34,11 @@ #include "libavutil/pixdesc.h" #include "libavutil/pixfmt.h" #include "libavutil/time.h" +#include "libavutil/imgutils.h" #include "avcodec.h" #include "internal.h" +#include "decode.h" #include "qsv.h" #include "qsv_internal.h" #include "qsvdec.h" @@ -54,6 +56,31 @@ const AVCodecHWConfigInternal *ff_qsv_hw_configs[] = { NULL }; +static int ff_qsv_get_continuous_buffer(AVCodecContext *avctx, AVFrame *frame, AVBufferPool *pool) +{ + int ret = 0; + + ff_decode_frame_props(avctx, frame); + + frame->width = avctx->width; + frame->height = avctx->height; + frame->linesize[0] = FFALIGN(avctx->width, 128); + frame->linesize[1] = frame->linesize[0]; + frame->buf[0] = av_buffer_pool_get(pool); + if (!frame->buf[0]) + return AVERROR(ENOMEM); + + frame->data[0] = frame->buf[0]->data; + frame->data[1] = frame->data[0] + + frame->linesize[0] * FFALIGN(avctx->height, 64); + + ret = ff_attach_decode_data(frame); + if (ret < 0) + return ret; + + return 0; +} + static int qsv_init_session(AVCodecContext *avctx, QSVContext *q, mfxSession session, AVBufferRef *hw_frames_ref, AVBufferRef *hw_device_ref) { @@ -74,7 +101,8 @@ static int qsv_init_session(AVCodecContext *avctx, QSVContext *q, mfxSession ses ret = ff_qsv_init_session_frames(avctx, &q->internal_session, &q->frames_ctx, q->load_plugins, - q->iopattern == MFX_IOPATTERN_OUT_OPAQUE_MEMORY); + q->iopattern == MFX_IOPATTERN_OUT_OPAQUE_MEMORY, + q->gpu_copy); if (ret < 0) { av_buffer_unref(&q->frames_ctx.hw_frames_ctx); return ret; @@ -88,7 +116,7 @@ static int qsv_init_session(AVCodecContext *avctx, QSVContext *q, mfxSession ses } ret = ff_qsv_init_session_device(avctx, &q->internal_session, - hw_device_ref, q->load_plugins); + hw_device_ref, q->load_plugins, q->gpu_copy); if (ret < 0) return ret; @@ -96,7 +124,7 @@ static int qsv_init_session(AVCodecContext *avctx, QSVContext *q, mfxSession ses } else { if (!q->internal_session) { ret = ff_qsv_init_internal_session(avctx, &q->internal_session, - q->load_plugins); + q->load_plugins, q->gpu_copy); if (ret < 0) return ret; } @@ -213,6 +241,12 @@ static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q) q->frame_info = param.mfx.FrameInfo; + if (avctx->pix_fmt != AV_PIX_FMT_QSV) + q->pool = av_buffer_pool_init(av_image_get_buffer_size(avctx->pix_fmt, + FFALIGN(avctx->width, 128), + FFALIGN(avctx->height, 64), 1), + av_buffer_allocz); + return 0; } @@ -220,9 +254,15 @@ static int alloc_frame(AVCodecContext *avctx, QSVContext *q, QSVFrame *frame) { int ret; - ret = ff_get_buffer(avctx, frame->frame, AV_GET_BUFFER_FLAG_REF); - if (ret < 0) - return ret; + if (!q->pool) { + ret = ff_get_buffer(avctx, frame->frame, AV_GET_BUFFER_FLAG_REF); + if (ret < 0) + return ret; + } else { + ret = ff_qsv_get_continuous_buffer(avctx, frame->frame, q->pool); + if (ret < 0) + return ret; + } if (frame->frame->format == AV_PIX_FMT_QSV) { frame->surface = *(mfxFrameSurface1*)frame->frame->data[3]; @@ -484,6 +524,7 @@ int ff_qsv_decode_close(QSVContext *q) av_buffer_unref(&q->frames_ctx.hw_frames_ctx); av_buffer_unref(&q->frames_ctx.mids_buf); + av_buffer_pool_uninit(&q->pool); return 0; } diff --git a/libavcodec/qsvdec.h b/libavcodec/qsvdec.h index 111536caba..43ea03867e 100644 --- a/libavcodec/qsvdec.h +++ b/libavcodec/qsvdec.h @@ -62,10 +62,12 @@ typedef struct QSVContext { enum AVPixelFormat orig_pix_fmt; uint32_t fourcc; mfxFrameInfo frame_info; + AVBufferPool *pool; // options set by the caller int async_depth; int iopattern; + int gpu_copy; char *load_plugins; diff --git a/libavcodec/qsvdec_h2645.c b/libavcodec/qsvdec_h2645.c index 9b49f5506e..3d1f1cbfac 100644 --- a/libavcodec/qsvdec_h2645.c +++ b/libavcodec/qsvdec_h2645.c @@ -192,6 +192,11 @@ static const AVOption hevc_options[] = { { "load_plugins", "A :-separate list of hexadecimal plugin UIDs to load in an internal session", OFFSET(qsv.load_plugins), AV_OPT_TYPE_STRING, { .str = "" }, 0, 0, VD }, + + { "gpu_copy", "A GPU-accelerated memory copy for non-QSV pipelines", OFFSET(qsv.gpu_copy), AV_OPT_TYPE_INT, { .i64 = MFX_GPUCOPY_DEFAULT }, MFX_GPUCOPY_DEFAULT, MFX_GPUCOPY_OFF, VD, "gpu_copy"}, + { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_DEFAULT }, 0, 0, VD, "gpu_copy"}, + { "on", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_ON }, 0, 0, VD, "gpu_copy"}, + { "off", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_OFF }, 0, 0, VD, "gpu_copy"}, { NULL }, }; @@ -227,6 +232,11 @@ AVCodec ff_hevc_qsv_decoder = { #if CONFIG_H264_QSV_DECODER static const AVOption options[] = { { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT }, 1, INT_MAX, VD }, + + { "gpu_copy", "A GPU-accelerated memory copy for non-QSV pipelines", OFFSET(qsv.gpu_copy), AV_OPT_TYPE_INT, { .i64 = MFX_GPUCOPY_DEFAULT }, MFX_GPUCOPY_DEFAULT, MFX_GPUCOPY_OFF, VD, "gpu_copy"}, + { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_DEFAULT }, 0, 0, VD, "gpu_copy"}, + { "on", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_ON }, 0, 0, VD, "gpu_copy"}, + { "off", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_OFF }, 0, 0, VD, "gpu_copy"}, { NULL }, }; diff --git a/libavcodec/qsvdec_other.c b/libavcodec/qsvdec_other.c index 03251d2c85..37237180fb 100644 --- a/libavcodec/qsvdec_other.c +++ b/libavcodec/qsvdec_other.c @@ -169,6 +169,11 @@ static void qsv_decode_flush(AVCodecContext *avctx) #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM static const AVOption options[] = { { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT }, 1, INT_MAX, VD }, + + { "gpu_copy", "A GPU-accelerated memory copy for non-QSV pipelines", OFFSET(qsv.gpu_copy), AV_OPT_TYPE_INT, { .i64 = MFX_GPUCOPY_DEFAULT }, MFX_GPUCOPY_DEFAULT, MFX_GPUCOPY_OFF, VD, "gpu_copy"}, + { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_DEFAULT }, 0, 0, VD, "gpu_copy"}, + { "on", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_ON }, 0, 0, VD, "gpu_copy"}, + { "off", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_OFF }, 0, 0, VD, "gpu_copy"}, { NULL }, }; diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c index 5aa020d47b..3d008ed527 100644 --- a/libavcodec/qsvenc.c +++ b/libavcodec/qsvenc.c @@ -909,7 +909,8 @@ static int qsvenc_init_session(AVCodecContext *avctx, QSVEncContext *q) ret = ff_qsv_init_session_frames(avctx, &q->internal_session, &q->frames_ctx, q->load_plugins, - q->param.IOPattern == MFX_IOPATTERN_IN_OPAQUE_MEMORY); + q->param.IOPattern == MFX_IOPATTERN_IN_OPAQUE_MEMORY, + MFX_GPUCOPY_OFF); if (ret < 0) { av_buffer_unref(&q->frames_ctx.hw_frames_ctx); return ret; @@ -918,14 +919,14 @@ static int qsvenc_init_session(AVCodecContext *avctx, QSVEncContext *q) q->session = q->internal_session; } else if (avctx->hw_device_ctx) { ret = ff_qsv_init_session_device(avctx, &q->internal_session, - avctx->hw_device_ctx, q->load_plugins); + avctx->hw_device_ctx, q->load_plugins, MFX_GPUCOPY_OFF); if (ret < 0) return ret; q->session = q->internal_session; } else { ret = ff_qsv_init_internal_session(avctx, &q->internal_session, - q->load_plugins); + q->load_plugins, MFX_GPUCOPY_OFF); if (ret < 0) return ret;