Message ID | 20210408070929.860244-1-haihao.xiang@intel.com |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel,1/3] lavc/qsv: apply AVCodecContext AVOption -threads to QSV | expand |
Context | Check | Description |
---|---|---|
andriy/x86_make | success | Make finished |
andriy/x86_make_fate | success | Make fate finished |
andriy/PPC64_make | success | Make finished |
andriy/PPC64_make_fate | success | Make fate finished |
Hi Haihao, On Thu, Apr 8, 2021 at 3:10 PM Haihao Xiang <haihao.xiang@intel.com> wrote: > > By default the SDK creates a thread for each CPU when creating a mfx > session for decoding / encoding, which results in CPU overhead on a > multi CPU system. Actually creating 2 threads is a better choice for > most cases in practice. > > This patch allows user to specify the number of threads created for a > mfx session via option -threads. If the number is not specified, 2 > threads will be created by default. > > Note the SDK requires at least 2 threads to avoid dead locks[1] > > [1]https://github.com/Intel-Media-SDK/MediaSDK/blob/master/_studio/mfx_lib/scheduler/linux/src/mfx_scheduler_core_ischeduler.cpp#L90-L93 > --- Optional choice for users to specify the thread number looks reasonable to me, and decreasing the CPU overhead makes sense for HW encoding pipeline. Also curious about what's the tradeoff of decreasing the thread number to 2. Would the performance or something else drop? - linjie
On Sat, 2021-04-10 at 13:32 +0800, Linjie Fu wrote: > Hi Haihao, > > On Thu, Apr 8, 2021 at 3:10 PM Haihao Xiang <haihao.xiang@intel.com> wrote: > > > > By default the SDK creates a thread for each CPU when creating a mfx > > session for decoding / encoding, which results in CPU overhead on a > > multi CPU system. Actually creating 2 threads is a better choice for > > most cases in practice. > > > > This patch allows user to specify the number of threads created for a > > mfx session via option -threads. If the number is not specified, 2 > > threads will be created by default. > > > > Note the SDK requires at least 2 threads to avoid dead locks[1] > > > > [1] > > https://github.com/Intel-Media-SDK/MediaSDK/blob/master/_studio/mfx_lib/scheduler/linux/src/mfx_scheduler_core_ischeduler.cpp#L90-L93 > > --- > > Optional choice for users to specify the thread number looks reasonable to me, > and decreasing the CPU overhead makes sense for HW encoding pipeline. > > Also curious about what's the tradeoff of decreasing the thread number to 2. > Would the performance or something else drop? Thanks for the comment. MSDK threads are used to execute MSDK tasks. For hw decoding /encoding pipeline, these tasks are very light, so we may use a few threads for msdk tasks. I didn't see performance drop in my testing after applying this patch. Regards Haihao > > - linjie
diff --git a/libavcodec/qsv.c b/libavcodec/qsv.c index 6e3154e1a3..c725883c5c 100644 --- a/libavcodec/qsv.c +++ b/libavcodec/qsv.c @@ -390,11 +390,27 @@ int ff_qsv_init_internal_session(AVCodecContext *avctx, QSVSession *qs, const char *desc; int ret; +#if QSV_VERSION_ATLEAST(1, 15) + mfxExtBuffer *ext_params[1]; + mfxExtThreadsParam thread_param; +#endif + #if QSV_VERSION_ATLEAST(1, 16) init_par.GPUCopy = gpu_copy; #endif init_par.Implementation = impl; init_par.Version = ver; + +#if QSV_VERSION_ATLEAST(1, 15) + memset(&thread_param, 0, sizeof(thread_param)); + thread_param.Header.BufferId = MFX_EXTBUFF_THREADS_PARAM; + thread_param.Header.BufferSz = sizeof(thread_param); + thread_param.NumThread = FFMAX(2, avctx->thread_count); + ext_params[0] = (mfxExtBuffer *)&thread_param; + init_par.ExtParam = (mfxExtBuffer **)&ext_params; + init_par.NumExtParam = 1; +#endif + ret = MFXInitEx(init_par, &qs->session); if (ret < 0) return ff_qsv_print_error(avctx, ret, @@ -709,6 +725,11 @@ int ff_qsv_init_session_device(AVCodecContext *avctx, mfxSession *psession, int i, ret; +#if QSV_VERSION_ATLEAST(1, 15) + mfxExtBuffer *ext_params[1]; + mfxExtThreadsParam thread_param; +#endif + err = MFXQueryIMPL(parent_session, &impl); if (err == MFX_ERR_NONE) err = MFXQueryVersion(parent_session, &ver); @@ -734,6 +755,17 @@ int ff_qsv_init_session_device(AVCodecContext *avctx, mfxSession *psession, #endif init_par.Implementation = impl; init_par.Version = ver; + +#if QSV_VERSION_ATLEAST(1, 15) + memset(&thread_param, 0, sizeof(thread_param)); + thread_param.Header.BufferId = MFX_EXTBUFF_THREADS_PARAM; + thread_param.Header.BufferSz = sizeof(thread_param); + thread_param.NumThread = FFMAX(2, avctx->thread_count); + ext_params[0] = (mfxExtBuffer *)&thread_param; + init_par.ExtParam = (mfxExtBuffer **)&ext_params; + init_par.NumExtParam = 1; +#endif + err = MFXInitEx(init_par, &session); if (err != MFX_ERR_NONE) return ff_qsv_print_error(avctx, err, diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c index 5f2e641373..d3365b6f3b 100644 --- a/libavcodec/qsvdec.c +++ b/libavcodec/qsvdec.c @@ -846,6 +846,7 @@ AVCodec ff_##x##_qsv_decoder = { \ .close = qsv_decode_close, \ .bsfs = bsf_name, \ .capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_DR1 | AV_CODEC_CAP_AVOID_PROBING | AV_CODEC_CAP_HYBRID, \ + .caps_internal = FF_CODEC_CAP_AUTO_THREADS, \ .priv_class = &x##_qsv_class, \ .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_NV12, \ AV_PIX_FMT_P010, \ diff --git a/libavcodec/qsvenc_h264.c b/libavcodec/qsvenc_h264.c index ddafc45ec3..fb587ff87c 100644 --- a/libavcodec/qsvenc_h264.c +++ b/libavcodec/qsvenc_h264.c @@ -196,7 +196,7 @@ AVCodec ff_h264_qsv_encoder = { AV_PIX_FMT_NONE }, .priv_class = &class, .defaults = qsv_enc_defaults, - .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, + .caps_internal = FF_CODEC_CAP_INIT_CLEANUP | FF_CODEC_CAP_AUTO_THREADS, .wrapper_name = "qsv", .hw_configs = ff_qsv_enc_hw_configs, }; diff --git a/libavcodec/qsvenc_hevc.c b/libavcodec/qsvenc_hevc.c index 347f30655e..a9e5906309 100644 --- a/libavcodec/qsvenc_hevc.c +++ b/libavcodec/qsvenc_hevc.c @@ -289,7 +289,7 @@ AVCodec ff_hevc_qsv_encoder = { AV_PIX_FMT_NONE }, .priv_class = &class, .defaults = qsv_enc_defaults, - .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, + .caps_internal = FF_CODEC_CAP_INIT_CLEANUP | FF_CODEC_CAP_AUTO_THREADS, .wrapper_name = "qsv", .hw_configs = ff_qsv_enc_hw_configs, }; diff --git a/libavcodec/qsvenc_jpeg.c b/libavcodec/qsvenc_jpeg.c index f76af9486b..c0af8ad5b4 100644 --- a/libavcodec/qsvenc_jpeg.c +++ b/libavcodec/qsvenc_jpeg.c @@ -94,6 +94,7 @@ AVCodec ff_mjpeg_qsv_encoder = { AV_PIX_FMT_NONE }, .priv_class = &class, .defaults = qsv_enc_defaults, + .caps_internal = FF_CODEC_CAP_AUTO_THREADS, .wrapper_name = "qsv", .hw_configs = ff_qsv_enc_hw_configs, }; diff --git a/libavcodec/qsvenc_mpeg2.c b/libavcodec/qsvenc_mpeg2.c index 0e34bb75dc..e53e42c812 100644 --- a/libavcodec/qsvenc_mpeg2.c +++ b/libavcodec/qsvenc_mpeg2.c @@ -110,7 +110,7 @@ AVCodec ff_mpeg2_qsv_encoder = { AV_PIX_FMT_NONE }, .priv_class = &class, .defaults = qsv_enc_defaults, - .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, + .caps_internal = FF_CODEC_CAP_INIT_CLEANUP | FF_CODEC_CAP_AUTO_THREADS, .wrapper_name = "qsv", .hw_configs = ff_qsv_enc_hw_configs, }; diff --git a/libavcodec/qsvenc_vp9.c b/libavcodec/qsvenc_vp9.c index ce44c09397..eae100b18b 100644 --- a/libavcodec/qsvenc_vp9.c +++ b/libavcodec/qsvenc_vp9.c @@ -108,7 +108,7 @@ AVCodec ff_vp9_qsv_encoder = { AV_PIX_FMT_NONE }, .priv_class = &class, .defaults = qsv_enc_defaults, - .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, + .caps_internal = FF_CODEC_CAP_INIT_CLEANUP | FF_CODEC_CAP_AUTO_THREADS, .wrapper_name = "qsv", .hw_configs = ff_qsv_enc_hw_configs, };