Message ID | 20190927054634.32553-1-linjie.fu@intel.com |
---|---|
State | Superseded |
Headers | show |
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Linjie Fu > Sent: Friday, September 27, 2019 1:47 PM > To: ffmpeg-devel@ffmpeg.org > Cc: ChaoX A Liu <chaox.a.liu@intel.com>; Fu, Linjie <linjie.fu@intel.com> > Subject: [FFmpeg-devel] [PATCH, v2] lavc/qsvdec: Add GPU-accelerated memory > copy support > > GPU copy enables or disables GPU accelerated copying between video and > system memory. This may lead to a notable performance improvement. > Memory must be sequent and aligned with 128x64. > (first introduced in FFmpeg 3.3.1) This line should be removed. FFmpeg 3.3.1 mainline never support GPU copy. > > CMD: > ffmpeg -init_hw_device qsv=hw -filter_hw_device hw -c:v h264_qsv > -gpu_copy on -i input.h264 -f null - > or: > ffmpeg -c:v h264_qsv -gpu_copy on -i input.h264 -f null - > > Signed-off-by: Linjie Fu <linjie.fu@intel.com> > Signed-off-by: ChaoX A Liu <chaox.a.liu@intel.com> > --- > Rebased and send again. > > libavcodec/qsv.c | 31 +++++++++++++++++------- > libavcodec/qsv_internal.h | 7 +++--- > libavcodec/qsvdec.c | 50 ++++++++++++++++++++++++++++++++++----- > libavcodec/qsvdec.h | 2 ++ > libavcodec/qsvdec_h2645.c | 10 ++++++++ libavcodec/qsvdec_other.c | 5 ++++ > libavcodec/qsvenc.c | 8 ++++--- > 7 files changed, 92 insertions(+), 21 deletions(-) > > diff --git a/libavcodec/qsv.c b/libavcodec/qsv.c index 994c9ebcb0..9e66fbc9da > 100644 > --- a/libavcodec/qsv.c > +++ b/libavcodec/qsv.c > @@ -412,15 +412,19 @@ static int ff_qsv_set_display_handle(AVCodecContext > *avctx, QSVSession *qs) #endif //AVCODEC_QSV_LINUX_SESSION_HANDLE > > int ff_qsv_init_internal_session(AVCodecContext *avctx, QSVSession *qs, > - const char *load_plugins) > + const char *load_plugins, int > + gpu_copy) > { > - mfxIMPL impl = MFX_IMPL_AUTO_ANY; > - mfxVersion ver = { { QSV_VERSION_MINOR, QSV_VERSION_MAJOR } }; > + mfxIMPL impl = MFX_IMPL_AUTO_ANY; > + mfxVersion ver = { { QSV_VERSION_MINOR, QSV_VERSION_MAJOR } }; > + mfxInitParam init_par = { MFX_IMPL_AUTO_ANY }; > > const char *desc; > int ret; > > - ret = MFXInit(impl, &ver, &qs->session); > + init_par.GPUCopy = gpu_copy; GPUCopy field is introduced from API 1.16, would better to check it to avoid compile issue with old API. > + init_par.Implementation = impl; > + init_par.Version = ver; > + ret = MFXInitEx(init_par, &qs->session); > if (ret < 0) > return ff_qsv_print_error(avctx, ret, > "Error initializing an internal MFX session"); @@ -712,7 > +716,8 @@ static mfxStatus qsv_frame_get_hdl(mfxHDL pthis, mfxMemId mid, > mfxHDL *hdl) } > > int ff_qsv_init_session_device(AVCodecContext *avctx, mfxSession *psession, > - AVBufferRef *device_ref, const char *load_plugins) > + AVBufferRef *device_ref, const char *load_plugins, > + int gpu_copy) > { > static const mfxHandleType handle_types[] = { > MFX_HANDLE_VA_DISPLAY, > @@ -722,11 +727,12 @@ int ff_qsv_init_session_device(AVCodecContext > *avctx, mfxSession *psession, > AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)device_ref- > >data; > AVQSVDeviceContext *device_hwctx = device_ctx->hwctx; > mfxSession parent_session = device_hwctx->session; > + mfxInitParam init_par = { MFX_IMPL_AUTO_ANY }; > + mfxHDL handle = NULL; > > mfxSession session; > mfxVersion ver; > mfxIMPL impl; > - mfxHDL handle = NULL; > mfxHandleType handle_type; > mfxStatus err; > > @@ -752,7 +758,10 @@ int ff_qsv_init_session_device(AVCodecContext *avctx, > mfxSession *psession, > "from the session\n"); > } > > - err = MFXInit(impl, &ver, &session); > + init_par.GPUCopy = gpu_copy; > + init_par.Implementation = impl; > + init_par.Version = ver; > + err = MFXInitEx(init_par, &session); > if (err != MFX_ERR_NONE) > return ff_qsv_print_error(avctx, err, > "Error initializing a child MFX session"); @@ -783,7 +792,7 > @@ int ff_qsv_init_session_device(AVCodecContext *avctx, mfxSession > *psession, > > int ff_qsv_init_session_frames(AVCodecContext *avctx, mfxSession *psession, > QSVFramesContext *qsv_frames_ctx, > - const char *load_plugins, int opaque) > + const char *load_plugins, int opaque, > + int gpu_copy) > { > mfxFrameAllocator frame_allocator = { > .pthis = qsv_frames_ctx, > @@ -802,8 +811,12 @@ int ff_qsv_init_session_frames(AVCodecContext *avctx, > mfxSession *psession, > > int ret; > > + if (gpu_copy == MFX_GPUCOPY_ON) > + av_log(avctx, AV_LOG_WARNING, "GPU-accelerated memory copy " > + "only works in > + MFX_IOPATTERN_OUT_SYSTEM_MEMORY.\n"); This looks weird: 1. the waring log will always turn on if gpu_copy is true no matter what iopattern. So would be better: if (gpu_copy == MFX_GPUCOPY_ON && iopattern != system memory) print a warning. 2. It is only added for ff_qsv_init_session_frames(), but looks like should be apply for qsv_init_session() > ret = ff_qsv_init_session_device(avctx, &session, > - frames_ctx->device_ref, load_plugins); > + frames_ctx->device_ref, > + load_plugins, gpu_copy); > if (ret < 0) > return ret; > > diff --git a/libavcodec/qsv_internal.h b/libavcodec/qsv_internal.h index > 8b44a9b6f4..37559270e5 100644 > --- a/libavcodec/qsv_internal.h > +++ b/libavcodec/qsv_internal.h > @@ -127,16 +127,17 @@ enum AVPictureType ff_qsv_map_pictype(int > mfx_pic_type); enum AVFieldOrder ff_qsv_map_picstruct(int mfx_pic_struct); > > int ff_qsv_init_internal_session(AVCodecContext *avctx, QSVSession *qs, > - const char *load_plugins); > + const char *load_plugins, int > + gpu_copy); > > int ff_qsv_close_internal_session(QSVSession *qs); > > int ff_qsv_init_session_device(AVCodecContext *avctx, mfxSession *psession, > - AVBufferRef *device_ref, const char *load_plugins); > + AVBufferRef *device_ref, const char *load_plugins, > + int gpu_copy); > > int ff_qsv_init_session_frames(AVCodecContext *avctx, mfxSession *session, > QSVFramesContext *qsv_frames_ctx, > - const char *load_plugins, int opaque); > + const char *load_plugins, int opaque, > + int gpu_copy); > > int ff_qsv_find_surface_idx(QSVFramesContext *ctx, QSVFrame *frame); > > diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c index > 9299596e33..a947583702 100644 > --- a/libavcodec/qsvdec.c > +++ b/libavcodec/qsvdec.c > @@ -34,9 +34,11 @@ > #include "libavutil/pixdesc.h" > #include "libavutil/pixfmt.h" > #include "libavutil/time.h" > +#include "libavutil/imgutils.h" > > #include "avcodec.h" > #include "internal.h" > +#include "decode.h" > #include "qsv.h" > #include "qsv_internal.h" > #include "qsvdec.h" > @@ -54,6 +56,31 @@ const AVCodecHWConfigInternal *ff_qsv_hw_configs[] = { > NULL > }; > > +static int ff_qsv_get_continuous_buffer(AVCodecContext *avctx, AVFrame > +*frame, AVBufferPool *pool) { > + int ret = 0; > + > + ff_decode_frame_props(avctx, frame); > + > + frame->width = avctx->width; > + frame->height = avctx->height; > + frame->linesize[0] = FFALIGN(avctx->width, 128); > + frame->linesize[1] = frame->linesize[0]; > + frame->buf[0] = av_buffer_pool_get(pool); > + if (!frame->buf[0]) > + return AVERROR(ENOMEM); > + > + frame->data[0] = frame->buf[0]->data; > + frame->data[1] = frame->data[0] + > + frame->linesize[0] * FFALIGN(avctx->height, > + 64); > + > + ret = ff_attach_decode_data(frame); Could you please explain why need this function? I don't see private_ref is needed from qsv decoding. > + if (ret < 0) > + return ret; > + > + return 0; > +} > + > static int qsv_init_session(AVCodecContext *avctx, QSVContext *q, mfxSession > session, > AVBufferRef *hw_frames_ref, AVBufferRef *hw_device_ref) > { @@ -74,7 +101,8 @@ static int qsv_init_session(AVCodecContext *avctx, > QSVContext *q, mfxSession ses > > ret = ff_qsv_init_session_frames(avctx, &q->internal_qs.session, > &q->frames_ctx, q->load_plugins, > - q->iopattern == > MFX_IOPATTERN_OUT_OPAQUE_MEMORY); > + q->iopattern == > MFX_IOPATTERN_OUT_OPAQUE_MEMORY, > + q->gpu_copy); > if (ret < 0) { > av_buffer_unref(&q->frames_ctx.hw_frames_ctx); > return ret; > @@ -88,7 +116,7 @@ static int qsv_init_session(AVCodecContext *avctx, > QSVContext *q, mfxSession ses > } > > ret = ff_qsv_init_session_device(avctx, &q->internal_qs.session, > - hw_device_ref, q->load_plugins); > + hw_device_ref, > + q->load_plugins, q->gpu_copy); > if (ret < 0) > return ret; > > @@ -96,7 +124,7 @@ static int qsv_init_session(AVCodecContext *avctx, > QSVContext *q, mfxSession ses > } else { > if (!q->internal_qs.session) { > ret = ff_qsv_init_internal_session(avctx, &q->internal_qs, > - q->load_plugins); > + q->load_plugins, > + q->gpu_copy); > if (ret < 0) > return ret; > } > @@ -229,6 +257,9 @@ static int qsv_decode_init(AVCodecContext *avctx, > QSVContext *q, mfxVideoParam * > > q->frame_info = param->mfx.FrameInfo; > > + if (!avctx->hw_frames_ctx) > + q->pool = av_buffer_pool_init(av_image_get_buffer_size(avctx->pix_fmt, > + FFALIGN(avctx->width, 128), FFALIGN(avctx->height, > + 64), 1), av_buffer_allocz); > return 0; > } > > @@ -275,9 +306,15 @@ static int alloc_frame(AVCodecContext *avctx, > QSVContext *q, QSVFrame *frame) { > int ret; > > - ret = ff_get_buffer(avctx, frame->frame, AV_GET_BUFFER_FLAG_REF); > - if (ret < 0) > - return ret; > + if (!q->pool) { > + ret = ff_get_buffer(avctx, frame->frame, AV_GET_BUFFER_FLAG_REF); > + if (ret < 0) > + return ret; > + } else { > + ret = ff_qsv_get_continuous_buffer(avctx, frame->frame, q->pool); > + if (ret < 0) > + return ret; > + } > > if (frame->frame->format == AV_PIX_FMT_QSV) { > frame->surface = *(mfxFrameSurface1*)frame->frame->data[3]; > @@ -535,6 +572,7 @@ int ff_qsv_decode_close(QSVContext *q) > > av_buffer_unref(&q->frames_ctx.hw_frames_ctx); > av_buffer_unref(&q->frames_ctx.mids_buf); > + av_buffer_pool_uninit(&q->pool); > > return 0; > } > diff --git a/libavcodec/qsvdec.h b/libavcodec/qsvdec.h index > 64dc8d2f47..dec1f61ceb 100644 > --- a/libavcodec/qsvdec.h > +++ b/libavcodec/qsvdec.h > @@ -59,12 +59,14 @@ typedef struct QSVContext { > enum AVPixelFormat orig_pix_fmt; > uint32_t fourcc; > mfxFrameInfo frame_info; > + AVBufferPool *pool; > > int initialized; > > // options set by the caller > int async_depth; > int iopattern; > + int gpu_copy; > > char *load_plugins; > > diff --git a/libavcodec/qsvdec_h2645.c b/libavcodec/qsvdec_h2645.c index > eb1dc336a4..d7ac00689a 100644 > --- a/libavcodec/qsvdec_h2645.c > +++ b/libavcodec/qsvdec_h2645.c > @@ -193,6 +193,11 @@ static const AVOption hevc_options[] = { > > { "load_plugins", "A :-separate list of hexadecimal plugin UIDs to load in an > internal session", > OFFSET(qsv.load_plugins), AV_OPT_TYPE_STRING, { .str = "" }, 0, 0, VD }, > + > + { "gpu_copy", "A GPU-accelerated memory copy between video and system > memory", OFFSET(qsv.gpu_copy), AV_OPT_TYPE_INT, { .i64 = > MFX_GPUCOPY_DEFAULT }, MFX_GPUCOPY_DEFAULT, MFX_GPUCOPY_OFF, VD, > "gpu_copy"}, > + { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = > MFX_GPUCOPY_DEFAULT }, 0, 0, VD, "gpu_copy"}, > + { "on", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_ON }, > 0, 0, VD, "gpu_copy"}, > + { "off", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_OFF }, > 0, 0, VD, "gpu_copy"}, > { NULL }, > }; > > @@ -228,6 +233,11 @@ AVCodec ff_hevc_qsv_decoder = { #if > CONFIG_H264_QSV_DECODER static const AVOption options[] = { > { "async_depth", "Internal parallelization depth, the higher the value the higher > the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = > ASYNC_DEPTH_DEFAULT }, 1, INT_MAX, VD }, > + > + { "gpu_copy", "A GPU-accelerated copy between video and system memory", > OFFSET(qsv.gpu_copy), AV_OPT_TYPE_INT, { .i64 = MFX_GPUCOPY_DEFAULT }, > MFX_GPUCOPY_DEFAULT, MFX_GPUCOPY_OFF, VD, "gpu_copy"}, > + { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = > MFX_GPUCOPY_DEFAULT }, 0, 0, VD, "gpu_copy"}, > + { "on", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_ON }, 0, > 0, VD, "gpu_copy"}, > + { "off", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_OFF }, 0, > 0, VD, "gpu_copy"}, > { NULL }, > }; > > diff --git a/libavcodec/qsvdec_other.c b/libavcodec/qsvdec_other.c index > b044c11540..13b1a99113 100644 > --- a/libavcodec/qsvdec_other.c > +++ b/libavcodec/qsvdec_other.c > @@ -181,6 +181,11 @@ static void qsv_decode_flush(AVCodecContext *avctx) > #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM > static const AVOption options[] = { > { "async_depth", "Internal parallelization depth, the higher the value the higher > the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = > ASYNC_DEPTH_DEFAULT }, 1, INT_MAX, VD }, > + > + { "gpu_copy", "A GPU-accelerated memory copy between video and system > memory", OFFSET(qsv.gpu_copy), AV_OPT_TYPE_INT, { .i64 = > MFX_GPUCOPY_DEFAULT }, MFX_GPUCOPY_DEFAULT, MFX_GPUCOPY_OFF, VD, > "gpu_copy"}, > + { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = > MFX_GPUCOPY_DEFAULT }, 0, 0, VD, "gpu_copy"}, > + { "on", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_ON }, 0, > 0, VD, "gpu_copy"}, > + { "off", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_OFF }, 0, > 0, VD, "gpu_copy"}, > { NULL }, > }; > > diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c index > 207cdc1d61..ba85d645ca 100644 > --- a/libavcodec/qsvenc.c > +++ b/libavcodec/qsvenc.c > @@ -956,7 +956,8 @@ static int qsvenc_init_session(AVCodecContext *avctx, > QSVEncContext *q) > > ret = ff_qsv_init_session_frames(avctx, &q->internal_qs.session, > &q->frames_ctx, q->load_plugins, > - q->param.IOPattern == > MFX_IOPATTERN_IN_OPAQUE_MEMORY); > + q->param.IOPattern == > MFX_IOPATTERN_IN_OPAQUE_MEMORY, > + MFX_GPUCOPY_OFF); > if (ret < 0) { > av_buffer_unref(&q->frames_ctx.hw_frames_ctx); > return ret; > @@ -965,14 +966,15 @@ static int qsvenc_init_session(AVCodecContext *avctx, > QSVEncContext *q) > q->session = q->internal_qs.session; > } else if (avctx->hw_device_ctx) { > ret = ff_qsv_init_session_device(avctx, &q->internal_qs.session, > - avctx->hw_device_ctx, q->load_plugins); > + avctx->hw_device_ctx, q->load_plugins, > + MFX_GPUCOPY_OFF); > if (ret < 0) > return ret; > > q->session = q->internal_qs.session; > } else { > ret = ff_qsv_init_internal_session(avctx, &q->internal_qs, > - q->load_plugins); > + q->load_plugins, > + MFX_GPUCOPY_OFF); > if (ret < 0) > return ret; > > -- > 2.17.1
> -----Original Message----- > From: Li, Zhong <zhong.li@intel.com> > Sent: Sunday, September 29, 2019 11:57 > To: FFmpeg development discussions and patches <ffmpeg- > devel@ffmpeg.org> > Cc: ChaoX A Liu <chaox.a.liu@intel.com>; Fu, Linjie <linjie.fu@intel.com> > Subject: RE: [FFmpeg-devel] [PATCH, v2] lavc/qsvdec: Add GPU-accelerated > memory copy support > > > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of > Linjie Fu > > Sent: Friday, September 27, 2019 1:47 PM > > To: ffmpeg-devel@ffmpeg.org > > Cc: ChaoX A Liu <chaox.a.liu@intel.com>; Fu, Linjie <linjie.fu@intel.com> > > Subject: [FFmpeg-devel] [PATCH, v2] lavc/qsvdec: Add GPU-accelerated > memory > > copy support > > > > GPU copy enables or disables GPU accelerated copying between video and > > system memory. This may lead to a notable performance improvement. > > Memory must be sequent and aligned with 128x64. > > (first introduced in FFmpeg 3.3.1) > > This line should be removed. FFmpeg 3.3.1 mainline never support GPU copy. > Double confirmed and removed. > > > > CMD: > > ffmpeg -init_hw_device qsv=hw -filter_hw_device hw -c:v h264_qsv > > -gpu_copy on -i input.h264 -f null - > > or: > > ffmpeg -c:v h264_qsv -gpu_copy on -i input.h264 -f null - > > > > Signed-off-by: Linjie Fu <linjie.fu@intel.com> > > Signed-off-by: ChaoX A Liu <chaox.a.liu@intel.com> > > --- > > Rebased and send again. > > > > libavcodec/qsv.c | 31 +++++++++++++++++------- > > libavcodec/qsv_internal.h | 7 +++--- > > libavcodec/qsvdec.c | 50 ++++++++++++++++++++++++++++++++++-- > --- > > libavcodec/qsvdec.h | 2 ++ > > libavcodec/qsvdec_h2645.c | 10 ++++++++ libavcodec/qsvdec_other.c | 5 > ++++ > > libavcodec/qsvenc.c | 8 ++++--- > > 7 files changed, 92 insertions(+), 21 deletions(-) > > > > diff --git a/libavcodec/qsv.c b/libavcodec/qsv.c index > 994c9ebcb0..9e66fbc9da > > 100644 > > --- a/libavcodec/qsv.c > > +++ b/libavcodec/qsv.c > > @@ -412,15 +412,19 @@ static int > ff_qsv_set_display_handle(AVCodecContext > > *avctx, QSVSession *qs) #endif > //AVCODEC_QSV_LINUX_SESSION_HANDLE > > > > int ff_qsv_init_internal_session(AVCodecContext *avctx, QSVSession *qs, > > - const char *load_plugins) > > + const char *load_plugins, int > > + gpu_copy) > > { > > - mfxIMPL impl = MFX_IMPL_AUTO_ANY; > > - mfxVersion ver = { { QSV_VERSION_MINOR, QSV_VERSION_MAJOR } }; > > + mfxIMPL impl = MFX_IMPL_AUTO_ANY; > > + mfxVersion ver = { { QSV_VERSION_MINOR, > QSV_VERSION_MAJOR } }; > > + mfxInitParam init_par = { MFX_IMPL_AUTO_ANY }; > > > > const char *desc; > > int ret; > > > > - ret = MFXInit(impl, &ver, &qs->session); > > + init_par.GPUCopy = gpu_copy; > > GPUCopy field is introduced from API 1.16, would better to check it to avoid > compile issue with old API. > > @@ -802,8 +811,12 @@ int ff_qsv_init_session_frames(AVCodecContext > *avctx, > > mfxSession *psession, > > > > int ret; > > > > + if (gpu_copy == MFX_GPUCOPY_ON) > > + av_log(avctx, AV_LOG_WARNING, "GPU-accelerated memory copy " > > + "only works in > > + MFX_IOPATTERN_OUT_SYSTEM_MEMORY.\n"); > > This looks weird: > 1. the waring log will always turn on if gpu_copy is true no matter what > iopattern. > So would be better: > if (gpu_copy == MFX_GPUCOPY_ON && iopattern != system memory) > print a warning. > > 2. It is only added for ff_qsv_init_session_frames(), but looks like should be > apply for qsv_init_session() > Thanks, will update and resend the patch soon. > > > > +static int ff_qsv_get_continuous_buffer(AVCodecContext *avctx, > AVFrame > > +*frame, AVBufferPool *pool) { > > + int ret = 0; > > + > > + ff_decode_frame_props(avctx, frame); > > + > > + frame->width = avctx->width; > > + frame->height = avctx->height; > > + frame->linesize[0] = FFALIGN(avctx->width, 128); > > + frame->linesize[1] = frame->linesize[0]; > > + frame->buf[0] = av_buffer_pool_get(pool); > > + if (!frame->buf[0]) > > + return AVERROR(ENOMEM); > > + > > + frame->data[0] = frame->buf[0]->data; > > + frame->data[1] = frame->data[0] + > > + frame->linesize[0] * FFALIGN(avctx->height, > > + 64); > > + > > + ret = ff_attach_decode_data(frame); > > Could you please explain why need this function? I don't see private_ref is > needed from qsv decoding. private_ref is required if a decoder declares the capability of AV_CODEC_CAP_DR1. https://github.com/FFmpeg/FFmpeg/blob/master/libavcodec/decode.c#L654 - linjie
> > > +static int ff_qsv_get_continuous_buffer(AVCodecContext *avctx, > > AVFrame > > > +*frame, AVBufferPool *pool) { > > > + int ret = 0; > > > + > > > + ff_decode_frame_props(avctx, frame); > > > + > > > + frame->width = avctx->width; > > > + frame->height = avctx->height; > > > + frame->linesize[0] = FFALIGN(avctx->width, 128); > > > + frame->linesize[1] = frame->linesize[0]; > > > + frame->buf[0] = av_buffer_pool_get(pool); > > > + if (!frame->buf[0]) > > > + return AVERROR(ENOMEM); > > > + > > > + frame->data[0] = frame->buf[0]->data; > > > + frame->data[1] = frame->data[0] + > > > + frame->linesize[0] * > > > + FFALIGN(avctx->height, 64); > > > + > > > + ret = ff_attach_decode_data(frame); > > > > Could you please explain why need this function? I don't see > > private_ref is needed from qsv decoding. > > private_ref is required if a decoder declares the capability of > AV_CODEC_CAP_DR1. > https://github.com/FFmpeg/FFmpeg/blob/master/libavcodec/decode.c#L654 > > - Linjie Ok, to keep consistency it is ok though get_buffer() is not called in the path of ff_qsv_get_continuous_buffer ().
diff --git a/libavcodec/qsv.c b/libavcodec/qsv.c index 994c9ebcb0..9e66fbc9da 100644 --- a/libavcodec/qsv.c +++ b/libavcodec/qsv.c @@ -412,15 +412,19 @@ static int ff_qsv_set_display_handle(AVCodecContext *avctx, QSVSession *qs) #endif //AVCODEC_QSV_LINUX_SESSION_HANDLE int ff_qsv_init_internal_session(AVCodecContext *avctx, QSVSession *qs, - const char *load_plugins) + const char *load_plugins, int gpu_copy) { - mfxIMPL impl = MFX_IMPL_AUTO_ANY; - mfxVersion ver = { { QSV_VERSION_MINOR, QSV_VERSION_MAJOR } }; + mfxIMPL impl = MFX_IMPL_AUTO_ANY; + mfxVersion ver = { { QSV_VERSION_MINOR, QSV_VERSION_MAJOR } }; + mfxInitParam init_par = { MFX_IMPL_AUTO_ANY }; const char *desc; int ret; - ret = MFXInit(impl, &ver, &qs->session); + init_par.GPUCopy = gpu_copy; + init_par.Implementation = impl; + init_par.Version = ver; + ret = MFXInitEx(init_par, &qs->session); if (ret < 0) return ff_qsv_print_error(avctx, ret, "Error initializing an internal MFX session"); @@ -712,7 +716,8 @@ static mfxStatus qsv_frame_get_hdl(mfxHDL pthis, mfxMemId mid, mfxHDL *hdl) } int ff_qsv_init_session_device(AVCodecContext *avctx, mfxSession *psession, - AVBufferRef *device_ref, const char *load_plugins) + AVBufferRef *device_ref, const char *load_plugins, + int gpu_copy) { static const mfxHandleType handle_types[] = { MFX_HANDLE_VA_DISPLAY, @@ -722,11 +727,12 @@ int ff_qsv_init_session_device(AVCodecContext *avctx, mfxSession *psession, AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)device_ref->data; AVQSVDeviceContext *device_hwctx = device_ctx->hwctx; mfxSession parent_session = device_hwctx->session; + mfxInitParam init_par = { MFX_IMPL_AUTO_ANY }; + mfxHDL handle = NULL; mfxSession session; mfxVersion ver; mfxIMPL impl; - mfxHDL handle = NULL; mfxHandleType handle_type; mfxStatus err; @@ -752,7 +758,10 @@ int ff_qsv_init_session_device(AVCodecContext *avctx, mfxSession *psession, "from the session\n"); } - err = MFXInit(impl, &ver, &session); + init_par.GPUCopy = gpu_copy; + init_par.Implementation = impl; + init_par.Version = ver; + err = MFXInitEx(init_par, &session); if (err != MFX_ERR_NONE) return ff_qsv_print_error(avctx, err, "Error initializing a child MFX session"); @@ -783,7 +792,7 @@ int ff_qsv_init_session_device(AVCodecContext *avctx, mfxSession *psession, int ff_qsv_init_session_frames(AVCodecContext *avctx, mfxSession *psession, QSVFramesContext *qsv_frames_ctx, - const char *load_plugins, int opaque) + const char *load_plugins, int opaque, int gpu_copy) { mfxFrameAllocator frame_allocator = { .pthis = qsv_frames_ctx, @@ -802,8 +811,12 @@ int ff_qsv_init_session_frames(AVCodecContext *avctx, mfxSession *psession, int ret; + if (gpu_copy == MFX_GPUCOPY_ON) + av_log(avctx, AV_LOG_WARNING, "GPU-accelerated memory copy " + "only works in MFX_IOPATTERN_OUT_SYSTEM_MEMORY.\n"); + ret = ff_qsv_init_session_device(avctx, &session, - frames_ctx->device_ref, load_plugins); + frames_ctx->device_ref, load_plugins, gpu_copy); if (ret < 0) return ret; diff --git a/libavcodec/qsv_internal.h b/libavcodec/qsv_internal.h index 8b44a9b6f4..37559270e5 100644 --- a/libavcodec/qsv_internal.h +++ b/libavcodec/qsv_internal.h @@ -127,16 +127,17 @@ enum AVPictureType ff_qsv_map_pictype(int mfx_pic_type); enum AVFieldOrder ff_qsv_map_picstruct(int mfx_pic_struct); int ff_qsv_init_internal_session(AVCodecContext *avctx, QSVSession *qs, - const char *load_plugins); + const char *load_plugins, int gpu_copy); int ff_qsv_close_internal_session(QSVSession *qs); int ff_qsv_init_session_device(AVCodecContext *avctx, mfxSession *psession, - AVBufferRef *device_ref, const char *load_plugins); + AVBufferRef *device_ref, const char *load_plugins, + int gpu_copy); int ff_qsv_init_session_frames(AVCodecContext *avctx, mfxSession *session, QSVFramesContext *qsv_frames_ctx, - const char *load_plugins, int opaque); + const char *load_plugins, int opaque, int gpu_copy); int ff_qsv_find_surface_idx(QSVFramesContext *ctx, QSVFrame *frame); diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c index 9299596e33..a947583702 100644 --- a/libavcodec/qsvdec.c +++ b/libavcodec/qsvdec.c @@ -34,9 +34,11 @@ #include "libavutil/pixdesc.h" #include "libavutil/pixfmt.h" #include "libavutil/time.h" +#include "libavutil/imgutils.h" #include "avcodec.h" #include "internal.h" +#include "decode.h" #include "qsv.h" #include "qsv_internal.h" #include "qsvdec.h" @@ -54,6 +56,31 @@ const AVCodecHWConfigInternal *ff_qsv_hw_configs[] = { NULL }; +static int ff_qsv_get_continuous_buffer(AVCodecContext *avctx, AVFrame *frame, AVBufferPool *pool) +{ + int ret = 0; + + ff_decode_frame_props(avctx, frame); + + frame->width = avctx->width; + frame->height = avctx->height; + frame->linesize[0] = FFALIGN(avctx->width, 128); + frame->linesize[1] = frame->linesize[0]; + frame->buf[0] = av_buffer_pool_get(pool); + if (!frame->buf[0]) + return AVERROR(ENOMEM); + + frame->data[0] = frame->buf[0]->data; + frame->data[1] = frame->data[0] + + frame->linesize[0] * FFALIGN(avctx->height, 64); + + ret = ff_attach_decode_data(frame); + if (ret < 0) + return ret; + + return 0; +} + static int qsv_init_session(AVCodecContext *avctx, QSVContext *q, mfxSession session, AVBufferRef *hw_frames_ref, AVBufferRef *hw_device_ref) { @@ -74,7 +101,8 @@ static int qsv_init_session(AVCodecContext *avctx, QSVContext *q, mfxSession ses ret = ff_qsv_init_session_frames(avctx, &q->internal_qs.session, &q->frames_ctx, q->load_plugins, - q->iopattern == MFX_IOPATTERN_OUT_OPAQUE_MEMORY); + q->iopattern == MFX_IOPATTERN_OUT_OPAQUE_MEMORY, + q->gpu_copy); if (ret < 0) { av_buffer_unref(&q->frames_ctx.hw_frames_ctx); return ret; @@ -88,7 +116,7 @@ static int qsv_init_session(AVCodecContext *avctx, QSVContext *q, mfxSession ses } ret = ff_qsv_init_session_device(avctx, &q->internal_qs.session, - hw_device_ref, q->load_plugins); + hw_device_ref, q->load_plugins, q->gpu_copy); if (ret < 0) return ret; @@ -96,7 +124,7 @@ static int qsv_init_session(AVCodecContext *avctx, QSVContext *q, mfxSession ses } else { if (!q->internal_qs.session) { ret = ff_qsv_init_internal_session(avctx, &q->internal_qs, - q->load_plugins); + q->load_plugins, q->gpu_copy); if (ret < 0) return ret; } @@ -229,6 +257,9 @@ static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q, mfxVideoParam * q->frame_info = param->mfx.FrameInfo; + if (!avctx->hw_frames_ctx) + q->pool = av_buffer_pool_init(av_image_get_buffer_size(avctx->pix_fmt, + FFALIGN(avctx->width, 128), FFALIGN(avctx->height, 64), 1), av_buffer_allocz); return 0; } @@ -275,9 +306,15 @@ static int alloc_frame(AVCodecContext *avctx, QSVContext *q, QSVFrame *frame) { int ret; - ret = ff_get_buffer(avctx, frame->frame, AV_GET_BUFFER_FLAG_REF); - if (ret < 0) - return ret; + if (!q->pool) { + ret = ff_get_buffer(avctx, frame->frame, AV_GET_BUFFER_FLAG_REF); + if (ret < 0) + return ret; + } else { + ret = ff_qsv_get_continuous_buffer(avctx, frame->frame, q->pool); + if (ret < 0) + return ret; + } if (frame->frame->format == AV_PIX_FMT_QSV) { frame->surface = *(mfxFrameSurface1*)frame->frame->data[3]; @@ -535,6 +572,7 @@ int ff_qsv_decode_close(QSVContext *q) av_buffer_unref(&q->frames_ctx.hw_frames_ctx); av_buffer_unref(&q->frames_ctx.mids_buf); + av_buffer_pool_uninit(&q->pool); return 0; } diff --git a/libavcodec/qsvdec.h b/libavcodec/qsvdec.h index 64dc8d2f47..dec1f61ceb 100644 --- a/libavcodec/qsvdec.h +++ b/libavcodec/qsvdec.h @@ -59,12 +59,14 @@ typedef struct QSVContext { enum AVPixelFormat orig_pix_fmt; uint32_t fourcc; mfxFrameInfo frame_info; + AVBufferPool *pool; int initialized; // options set by the caller int async_depth; int iopattern; + int gpu_copy; char *load_plugins; diff --git a/libavcodec/qsvdec_h2645.c b/libavcodec/qsvdec_h2645.c index eb1dc336a4..d7ac00689a 100644 --- a/libavcodec/qsvdec_h2645.c +++ b/libavcodec/qsvdec_h2645.c @@ -193,6 +193,11 @@ static const AVOption hevc_options[] = { { "load_plugins", "A :-separate list of hexadecimal plugin UIDs to load in an internal session", OFFSET(qsv.load_plugins), AV_OPT_TYPE_STRING, { .str = "" }, 0, 0, VD }, + + { "gpu_copy", "A GPU-accelerated memory copy between video and system memory", OFFSET(qsv.gpu_copy), AV_OPT_TYPE_INT, { .i64 = MFX_GPUCOPY_DEFAULT }, MFX_GPUCOPY_DEFAULT, MFX_GPUCOPY_OFF, VD, "gpu_copy"}, + { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_DEFAULT }, 0, 0, VD, "gpu_copy"}, + { "on", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_ON }, 0, 0, VD, "gpu_copy"}, + { "off", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_OFF }, 0, 0, VD, "gpu_copy"}, { NULL }, }; @@ -228,6 +233,11 @@ AVCodec ff_hevc_qsv_decoder = { #if CONFIG_H264_QSV_DECODER static const AVOption options[] = { { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT }, 1, INT_MAX, VD }, + + { "gpu_copy", "A GPU-accelerated copy between video and system memory", OFFSET(qsv.gpu_copy), AV_OPT_TYPE_INT, { .i64 = MFX_GPUCOPY_DEFAULT }, MFX_GPUCOPY_DEFAULT, MFX_GPUCOPY_OFF, VD, "gpu_copy"}, + { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_DEFAULT }, 0, 0, VD, "gpu_copy"}, + { "on", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_ON }, 0, 0, VD, "gpu_copy"}, + { "off", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_OFF }, 0, 0, VD, "gpu_copy"}, { NULL }, }; diff --git a/libavcodec/qsvdec_other.c b/libavcodec/qsvdec_other.c index b044c11540..13b1a99113 100644 --- a/libavcodec/qsvdec_other.c +++ b/libavcodec/qsvdec_other.c @@ -181,6 +181,11 @@ static void qsv_decode_flush(AVCodecContext *avctx) #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM static const AVOption options[] = { { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT }, 1, INT_MAX, VD }, + + { "gpu_copy", "A GPU-accelerated memory copy between video and system memory", OFFSET(qsv.gpu_copy), AV_OPT_TYPE_INT, { .i64 = MFX_GPUCOPY_DEFAULT }, MFX_GPUCOPY_DEFAULT, MFX_GPUCOPY_OFF, VD, "gpu_copy"}, + { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_DEFAULT }, 0, 0, VD, "gpu_copy"}, + { "on", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_ON }, 0, 0, VD, "gpu_copy"}, + { "off", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_OFF }, 0, 0, VD, "gpu_copy"}, { NULL }, }; diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c index 207cdc1d61..ba85d645ca 100644 --- a/libavcodec/qsvenc.c +++ b/libavcodec/qsvenc.c @@ -956,7 +956,8 @@ static int qsvenc_init_session(AVCodecContext *avctx, QSVEncContext *q) ret = ff_qsv_init_session_frames(avctx, &q->internal_qs.session, &q->frames_ctx, q->load_plugins, - q->param.IOPattern == MFX_IOPATTERN_IN_OPAQUE_MEMORY); + q->param.IOPattern == MFX_IOPATTERN_IN_OPAQUE_MEMORY, + MFX_GPUCOPY_OFF); if (ret < 0) { av_buffer_unref(&q->frames_ctx.hw_frames_ctx); return ret; @@ -965,14 +966,15 @@ static int qsvenc_init_session(AVCodecContext *avctx, QSVEncContext *q) q->session = q->internal_qs.session; } else if (avctx->hw_device_ctx) { ret = ff_qsv_init_session_device(avctx, &q->internal_qs.session, - avctx->hw_device_ctx, q->load_plugins); + avctx->hw_device_ctx, q->load_plugins, + MFX_GPUCOPY_OFF); if (ret < 0) return ret; q->session = q->internal_qs.session; } else { ret = ff_qsv_init_internal_session(avctx, &q->internal_qs, - q->load_plugins); + q->load_plugins, MFX_GPUCOPY_OFF); if (ret < 0) return ret;