diff mbox

[FFmpeg-devel,v2] lavc/qsvdec: Add GPU-accelerated memory copy support

Message ID 20190927054634.32553-1-linjie.fu@intel.com
State Superseded
Headers show

Commit Message

Fu, Linjie Sept. 27, 2019, 5:46 a.m. UTC
GPU copy enables or disables GPU accelerated copying between video
and system memory. This may lead to a notable performance improvement.
Memory must be sequent and aligned with 128x64.
(first introduced in FFmpeg 3.3.1)

CMD:
ffmpeg -init_hw_device qsv=hw -filter_hw_device hw -c:v h264_qsv
                    -gpu_copy on -i input.h264 -f null -
or:
ffmpeg -c:v h264_qsv -gpu_copy on -i input.h264 -f null -

Signed-off-by: Linjie Fu <linjie.fu@intel.com>
Signed-off-by: ChaoX A Liu <chaox.a.liu@intel.com>
---
Rebased and send again.

 libavcodec/qsv.c          | 31 +++++++++++++++++-------
 libavcodec/qsv_internal.h |  7 +++---
 libavcodec/qsvdec.c       | 50 ++++++++++++++++++++++++++++++++++-----
 libavcodec/qsvdec.h       |  2 ++
 libavcodec/qsvdec_h2645.c | 10 ++++++++
 libavcodec/qsvdec_other.c |  5 ++++
 libavcodec/qsvenc.c       |  8 ++++---
 7 files changed, 92 insertions(+), 21 deletions(-)

Comments

Zhong Li Sept. 29, 2019, 3:57 a.m. UTC | #1
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Linjie Fu

> Sent: Friday, September 27, 2019 1:47 PM

> To: ffmpeg-devel@ffmpeg.org

> Cc: ChaoX A Liu <chaox.a.liu@intel.com>; Fu, Linjie <linjie.fu@intel.com>

> Subject: [FFmpeg-devel] [PATCH, v2] lavc/qsvdec: Add GPU-accelerated memory

> copy support

> 

> GPU copy enables or disables GPU accelerated copying between video and

> system memory. This may lead to a notable performance improvement.

> Memory must be sequent and aligned with 128x64.

> (first introduced in FFmpeg 3.3.1)


This line should be removed. FFmpeg 3.3.1 mainline never support GPU copy.

> 

> CMD:

> ffmpeg -init_hw_device qsv=hw -filter_hw_device hw -c:v h264_qsv

>                     -gpu_copy on -i input.h264 -f null -

> or:

> ffmpeg -c:v h264_qsv -gpu_copy on -i input.h264 -f null -

> 

> Signed-off-by: Linjie Fu <linjie.fu@intel.com>

> Signed-off-by: ChaoX A Liu <chaox.a.liu@intel.com>

> ---

> Rebased and send again.

> 

>  libavcodec/qsv.c          | 31 +++++++++++++++++-------

>  libavcodec/qsv_internal.h |  7 +++---

>  libavcodec/qsvdec.c       | 50 ++++++++++++++++++++++++++++++++++-----

>  libavcodec/qsvdec.h       |  2 ++

>  libavcodec/qsvdec_h2645.c | 10 ++++++++  libavcodec/qsvdec_other.c |  5 ++++

>  libavcodec/qsvenc.c       |  8 ++++---

>  7 files changed, 92 insertions(+), 21 deletions(-)

> 

> diff --git a/libavcodec/qsv.c b/libavcodec/qsv.c index 994c9ebcb0..9e66fbc9da

> 100644

> --- a/libavcodec/qsv.c

> +++ b/libavcodec/qsv.c

> @@ -412,15 +412,19 @@ static int ff_qsv_set_display_handle(AVCodecContext

> *avctx, QSVSession *qs)  #endif //AVCODEC_QSV_LINUX_SESSION_HANDLE

> 

>  int ff_qsv_init_internal_session(AVCodecContext *avctx, QSVSession *qs,

> -                                 const char *load_plugins)

> +                                 const char *load_plugins, int

> + gpu_copy)

>  {

> -    mfxIMPL impl   = MFX_IMPL_AUTO_ANY;

> -    mfxVersion ver = { { QSV_VERSION_MINOR, QSV_VERSION_MAJOR } };

> +    mfxIMPL          impl = MFX_IMPL_AUTO_ANY;

> +    mfxVersion        ver = { { QSV_VERSION_MINOR, QSV_VERSION_MAJOR } };

> +    mfxInitParam init_par = { MFX_IMPL_AUTO_ANY };

> 

>      const char *desc;

>      int ret;

> 

> -    ret = MFXInit(impl, &ver, &qs->session);

> +    init_par.GPUCopy        = gpu_copy;


GPUCopy field is introduced from API 1.16, would better to check it to avoid compile issue with old API.  

> +    init_par.Implementation = impl;

> +    init_par.Version        = ver;

> +    ret = MFXInitEx(init_par, &qs->session);

>      if (ret < 0)

>          return ff_qsv_print_error(avctx, ret,

>                                    "Error initializing an internal MFX session"); @@ -712,7

> +716,8 @@ static mfxStatus qsv_frame_get_hdl(mfxHDL pthis, mfxMemId mid,

> mfxHDL *hdl)  }

> 

>  int ff_qsv_init_session_device(AVCodecContext *avctx, mfxSession *psession,

> -                               AVBufferRef *device_ref, const char *load_plugins)

> +                               AVBufferRef *device_ref, const char *load_plugins,

> +                               int gpu_copy)

>  {

>      static const mfxHandleType handle_types[] = {

>          MFX_HANDLE_VA_DISPLAY,

> @@ -722,11 +727,12 @@ int ff_qsv_init_session_device(AVCodecContext

> *avctx, mfxSession *psession,

>      AVHWDeviceContext    *device_ctx = (AVHWDeviceContext*)device_ref-

> >data;

>      AVQSVDeviceContext *device_hwctx = device_ctx->hwctx;

>      mfxSession        parent_session = device_hwctx->session;

> +    mfxInitParam            init_par = { MFX_IMPL_AUTO_ANY };

> +    mfxHDL                    handle = NULL;

> 

>      mfxSession    session;

>      mfxVersion    ver;

>      mfxIMPL       impl;

> -    mfxHDL        handle = NULL;

>      mfxHandleType handle_type;

>      mfxStatus err;

> 

> @@ -752,7 +758,10 @@ int ff_qsv_init_session_device(AVCodecContext *avctx,

> mfxSession *psession,

>                 "from the session\n");

>      }

> 

> -    err = MFXInit(impl, &ver, &session);

> +    init_par.GPUCopy        = gpu_copy;

> +    init_par.Implementation = impl;

> +    init_par.Version        = ver;

> +    err = MFXInitEx(init_par, &session);

>      if (err != MFX_ERR_NONE)

>          return ff_qsv_print_error(avctx, err,

>                                    "Error initializing a child MFX session"); @@ -783,7 +792,7

> @@ int ff_qsv_init_session_device(AVCodecContext *avctx, mfxSession

> *psession,

> 

>  int ff_qsv_init_session_frames(AVCodecContext *avctx, mfxSession *psession,

>                                 QSVFramesContext *qsv_frames_ctx,

> -                               const char *load_plugins, int opaque)

> +                               const char *load_plugins, int opaque,

> + int gpu_copy)

>  {

>      mfxFrameAllocator frame_allocator = {

>          .pthis  = qsv_frames_ctx,

> @@ -802,8 +811,12 @@ int ff_qsv_init_session_frames(AVCodecContext *avctx,

> mfxSession *psession,

> 

>      int ret;

> 

> +    if (gpu_copy == MFX_GPUCOPY_ON)

> +        av_log(avctx, AV_LOG_WARNING, "GPU-accelerated memory copy "

> +                                    "only works in

> + MFX_IOPATTERN_OUT_SYSTEM_MEMORY.\n");


This looks weird:
1.  the waring log will always turn on if gpu_copy is true no matter what iopattern.
     So would be better:
    if (gpu_copy == MFX_GPUCOPY_ON && iopattern != system memory)
        print a warning. 

2. It is only added for ff_qsv_init_session_frames(), but looks like should be apply for qsv_init_session()

>      ret = ff_qsv_init_session_device(avctx, &session,

> -                                     frames_ctx->device_ref, load_plugins);

> +                                     frames_ctx->device_ref,

> + load_plugins, gpu_copy);

>      if (ret < 0)

>          return ret;

> 

> diff --git a/libavcodec/qsv_internal.h b/libavcodec/qsv_internal.h index

> 8b44a9b6f4..37559270e5 100644

> --- a/libavcodec/qsv_internal.h

> +++ b/libavcodec/qsv_internal.h

> @@ -127,16 +127,17 @@ enum AVPictureType ff_qsv_map_pictype(int

> mfx_pic_type);  enum AVFieldOrder ff_qsv_map_picstruct(int mfx_pic_struct);

> 

>  int ff_qsv_init_internal_session(AVCodecContext *avctx, QSVSession *qs,

> -                                 const char *load_plugins);

> +                                 const char *load_plugins, int

> + gpu_copy);

> 

>  int ff_qsv_close_internal_session(QSVSession *qs);

> 

>  int ff_qsv_init_session_device(AVCodecContext *avctx, mfxSession *psession,

> -                               AVBufferRef *device_ref, const char *load_plugins);

> +                               AVBufferRef *device_ref, const char *load_plugins,

> +                               int gpu_copy);

> 

>  int ff_qsv_init_session_frames(AVCodecContext *avctx, mfxSession *session,

>                                 QSVFramesContext *qsv_frames_ctx,

> -                               const char *load_plugins, int opaque);

> +                               const char *load_plugins, int opaque,

> + int gpu_copy);

> 

>  int ff_qsv_find_surface_idx(QSVFramesContext *ctx, QSVFrame *frame);

> 

> diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c index

> 9299596e33..a947583702 100644

> --- a/libavcodec/qsvdec.c

> +++ b/libavcodec/qsvdec.c

> @@ -34,9 +34,11 @@

>  #include "libavutil/pixdesc.h"

>  #include "libavutil/pixfmt.h"

>  #include "libavutil/time.h"

> +#include "libavutil/imgutils.h"

> 

>  #include "avcodec.h"

>  #include "internal.h"

> +#include "decode.h"

>  #include "qsv.h"

>  #include "qsv_internal.h"

>  #include "qsvdec.h"

> @@ -54,6 +56,31 @@ const AVCodecHWConfigInternal *ff_qsv_hw_configs[] = {

>      NULL

>  };

> 

> +static int ff_qsv_get_continuous_buffer(AVCodecContext *avctx, AVFrame

> +*frame, AVBufferPool *pool) {

> +    int ret = 0;

> +

> +    ff_decode_frame_props(avctx, frame);

> +

> +    frame->width       = avctx->width;

> +    frame->height      = avctx->height;

> +    frame->linesize[0] = FFALIGN(avctx->width, 128);

> +    frame->linesize[1] = frame->linesize[0];

> +    frame->buf[0]      = av_buffer_pool_get(pool);

> +    if (!frame->buf[0])

> +        return AVERROR(ENOMEM);

> +

> +    frame->data[0] = frame->buf[0]->data;

> +    frame->data[1] = frame->data[0] +

> +                            frame->linesize[0] * FFALIGN(avctx->height,

> + 64);

> +

> +    ret = ff_attach_decode_data(frame);


Could you please explain why need this function? I don't see private_ref is needed from qsv decoding.

> +    if (ret < 0)

> +        return ret;

> +

> +    return 0;

> +}

> +

>  static int qsv_init_session(AVCodecContext *avctx, QSVContext *q, mfxSession

> session,

>                              AVBufferRef *hw_frames_ref, AVBufferRef *hw_device_ref)

> { @@ -74,7 +101,8 @@ static int qsv_init_session(AVCodecContext *avctx,

> QSVContext *q, mfxSession ses

> 

>          ret = ff_qsv_init_session_frames(avctx, &q->internal_qs.session,

>                                           &q->frames_ctx, q->load_plugins,

> -                                         q->iopattern ==

> MFX_IOPATTERN_OUT_OPAQUE_MEMORY);

> +                                         q->iopattern ==

> MFX_IOPATTERN_OUT_OPAQUE_MEMORY,

> +                                         q->gpu_copy);

>          if (ret < 0) {

>              av_buffer_unref(&q->frames_ctx.hw_frames_ctx);

>              return ret;

> @@ -88,7 +116,7 @@ static int qsv_init_session(AVCodecContext *avctx,

> QSVContext *q, mfxSession ses

>          }

> 

>          ret = ff_qsv_init_session_device(avctx, &q->internal_qs.session,

> -                                         hw_device_ref, q->load_plugins);

> +                                         hw_device_ref,

> + q->load_plugins, q->gpu_copy);

>          if (ret < 0)

>              return ret;

> 

> @@ -96,7 +124,7 @@ static int qsv_init_session(AVCodecContext *avctx,

> QSVContext *q, mfxSession ses

>      } else {

>          if (!q->internal_qs.session) {

>              ret = ff_qsv_init_internal_session(avctx, &q->internal_qs,

> -                                               q->load_plugins);

> +                                               q->load_plugins,

> + q->gpu_copy);

>              if (ret < 0)

>                  return ret;

>          }

> @@ -229,6 +257,9 @@ static int qsv_decode_init(AVCodecContext *avctx,

> QSVContext *q, mfxVideoParam *

> 

>      q->frame_info = param->mfx.FrameInfo;

> 

> +    if (!avctx->hw_frames_ctx)

> +        q->pool = av_buffer_pool_init(av_image_get_buffer_size(avctx->pix_fmt,

> +                    FFALIGN(avctx->width, 128), FFALIGN(avctx->height,

> + 64), 1), av_buffer_allocz);

>      return 0;

>  }

> 

> @@ -275,9 +306,15 @@ static int alloc_frame(AVCodecContext *avctx,

> QSVContext *q, QSVFrame *frame)  {

>      int ret;

> 

> -    ret = ff_get_buffer(avctx, frame->frame, AV_GET_BUFFER_FLAG_REF);

> -    if (ret < 0)

> -        return ret;

> +    if (!q->pool) {

> +        ret = ff_get_buffer(avctx, frame->frame, AV_GET_BUFFER_FLAG_REF);

> +        if (ret < 0)

> +            return ret;

> +    } else {

> +        ret = ff_qsv_get_continuous_buffer(avctx, frame->frame, q->pool);

> +        if (ret < 0)

> +            return ret;

> +    }

> 

>      if (frame->frame->format == AV_PIX_FMT_QSV) {

>          frame->surface = *(mfxFrameSurface1*)frame->frame->data[3];

> @@ -535,6 +572,7 @@ int ff_qsv_decode_close(QSVContext *q)

> 

>      av_buffer_unref(&q->frames_ctx.hw_frames_ctx);

>      av_buffer_unref(&q->frames_ctx.mids_buf);

> +    av_buffer_pool_uninit(&q->pool);

> 

>      return 0;

>  }

> diff --git a/libavcodec/qsvdec.h b/libavcodec/qsvdec.h index

> 64dc8d2f47..dec1f61ceb 100644

> --- a/libavcodec/qsvdec.h

> +++ b/libavcodec/qsvdec.h

> @@ -59,12 +59,14 @@ typedef struct QSVContext {

>      enum AVPixelFormat orig_pix_fmt;

>      uint32_t fourcc;

>      mfxFrameInfo frame_info;

> +    AVBufferPool *pool;

> 

>      int initialized;

> 

>      // options set by the caller

>      int async_depth;

>      int iopattern;

> +    int gpu_copy;

> 

>      char *load_plugins;

> 

> diff --git a/libavcodec/qsvdec_h2645.c b/libavcodec/qsvdec_h2645.c index

> eb1dc336a4..d7ac00689a 100644

> --- a/libavcodec/qsvdec_h2645.c

> +++ b/libavcodec/qsvdec_h2645.c

> @@ -193,6 +193,11 @@ static const AVOption hevc_options[] = {

> 

>      { "load_plugins", "A :-separate list of hexadecimal plugin UIDs to load in an

> internal session",

>          OFFSET(qsv.load_plugins), AV_OPT_TYPE_STRING, { .str = "" }, 0, 0, VD },

> +

> +    { "gpu_copy", "A GPU-accelerated memory copy between video and system

> memory", OFFSET(qsv.gpu_copy), AV_OPT_TYPE_INT, { .i64 =

> MFX_GPUCOPY_DEFAULT }, MFX_GPUCOPY_DEFAULT, MFX_GPUCOPY_OFF, VD,

> "gpu_copy"},

> +        { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 =

> MFX_GPUCOPY_DEFAULT }, 0, 0, VD, "gpu_copy"},

> +        { "on",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_ON },

> 0, 0, VD, "gpu_copy"},

> +        { "off",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_OFF },

> 0, 0, VD, "gpu_copy"},

>      { NULL },

>  };

> 

> @@ -228,6 +233,11 @@ AVCodec ff_hevc_qsv_decoder = {  #if

> CONFIG_H264_QSV_DECODER  static const AVOption options[] = {

>      { "async_depth", "Internal parallelization depth, the higher the value the higher

> the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 =

> ASYNC_DEPTH_DEFAULT }, 1, INT_MAX, VD },

> +

> +    { "gpu_copy", "A GPU-accelerated copy between video and system memory",

> OFFSET(qsv.gpu_copy), AV_OPT_TYPE_INT, { .i64 = MFX_GPUCOPY_DEFAULT },

> MFX_GPUCOPY_DEFAULT, MFX_GPUCOPY_OFF, VD, "gpu_copy"},

> +    { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 =

> MFX_GPUCOPY_DEFAULT }, 0, 0, VD, "gpu_copy"},

> +    { "on",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_ON },      0,

> 0, VD, "gpu_copy"},

> +    { "off",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_OFF },     0,

> 0, VD, "gpu_copy"},

>      { NULL },

>  };

> 

> diff --git a/libavcodec/qsvdec_other.c b/libavcodec/qsvdec_other.c index

> b044c11540..13b1a99113 100644

> --- a/libavcodec/qsvdec_other.c

> +++ b/libavcodec/qsvdec_other.c

> @@ -181,6 +181,11 @@ static void qsv_decode_flush(AVCodecContext *avctx)

> #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM

> static const AVOption options[] = {

>      { "async_depth", "Internal parallelization depth, the higher the value the higher

> the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 =

> ASYNC_DEPTH_DEFAULT }, 1, INT_MAX, VD },

> +

> +    { "gpu_copy", "A GPU-accelerated memory copy between video and system

> memory", OFFSET(qsv.gpu_copy), AV_OPT_TYPE_INT, { .i64 =

> MFX_GPUCOPY_DEFAULT }, MFX_GPUCOPY_DEFAULT, MFX_GPUCOPY_OFF, VD,

> "gpu_copy"},

> +    { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 =

> MFX_GPUCOPY_DEFAULT }, 0, 0, VD, "gpu_copy"},

> +    { "on",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_ON },      0,

> 0, VD, "gpu_copy"},

> +    { "off",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_OFF },     0,

> 0, VD, "gpu_copy"},

>      { NULL },

>  };

> 

> diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c index

> 207cdc1d61..ba85d645ca 100644

> --- a/libavcodec/qsvenc.c

> +++ b/libavcodec/qsvenc.c

> @@ -956,7 +956,8 @@ static int qsvenc_init_session(AVCodecContext *avctx,

> QSVEncContext *q)

> 

>          ret = ff_qsv_init_session_frames(avctx, &q->internal_qs.session,

>                                           &q->frames_ctx, q->load_plugins,

> -                                         q->param.IOPattern ==

> MFX_IOPATTERN_IN_OPAQUE_MEMORY);

> +                                         q->param.IOPattern ==

> MFX_IOPATTERN_IN_OPAQUE_MEMORY,

> +                                         MFX_GPUCOPY_OFF);

>          if (ret < 0) {

>              av_buffer_unref(&q->frames_ctx.hw_frames_ctx);

>              return ret;

> @@ -965,14 +966,15 @@ static int qsvenc_init_session(AVCodecContext *avctx,

> QSVEncContext *q)

>          q->session = q->internal_qs.session;

>      } else if (avctx->hw_device_ctx) {

>          ret = ff_qsv_init_session_device(avctx, &q->internal_qs.session,

> -                                         avctx->hw_device_ctx, q->load_plugins);

> +                                         avctx->hw_device_ctx, q->load_plugins,

> +                                         MFX_GPUCOPY_OFF);

>          if (ret < 0)

>              return ret;

> 

>          q->session = q->internal_qs.session;

>      } else {

>          ret = ff_qsv_init_internal_session(avctx, &q->internal_qs,

> -                                           q->load_plugins);

> +                                           q->load_plugins,

> + MFX_GPUCOPY_OFF);

>          if (ret < 0)

>              return ret;

> 

> --

> 2.17.1
Fu, Linjie Sept. 29, 2019, 7:35 a.m. UTC | #2
> -----Original Message-----

> From: Li, Zhong <zhong.li@intel.com>

> Sent: Sunday, September 29, 2019 11:57

> To: FFmpeg development discussions and patches <ffmpeg-

> devel@ffmpeg.org>

> Cc: ChaoX A Liu <chaox.a.liu@intel.com>; Fu, Linjie <linjie.fu@intel.com>

> Subject: RE: [FFmpeg-devel] [PATCH, v2] lavc/qsvdec: Add GPU-accelerated

> memory copy support

> 

> > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of

> Linjie Fu

> > Sent: Friday, September 27, 2019 1:47 PM

> > To: ffmpeg-devel@ffmpeg.org

> > Cc: ChaoX A Liu <chaox.a.liu@intel.com>; Fu, Linjie <linjie.fu@intel.com>

> > Subject: [FFmpeg-devel] [PATCH, v2] lavc/qsvdec: Add GPU-accelerated

> memory

> > copy support

> >

> > GPU copy enables or disables GPU accelerated copying between video and

> > system memory. This may lead to a notable performance improvement.

> > Memory must be sequent and aligned with 128x64.

> > (first introduced in FFmpeg 3.3.1)

> 

> This line should be removed. FFmpeg 3.3.1 mainline never support GPU copy.

> 


Double confirmed and removed.

> >

> > CMD:

> > ffmpeg -init_hw_device qsv=hw -filter_hw_device hw -c:v h264_qsv

> >                     -gpu_copy on -i input.h264 -f null -

> > or:

> > ffmpeg -c:v h264_qsv -gpu_copy on -i input.h264 -f null -

> >

> > Signed-off-by: Linjie Fu <linjie.fu@intel.com>

> > Signed-off-by: ChaoX A Liu <chaox.a.liu@intel.com>

> > ---

> > Rebased and send again.

> >

> >  libavcodec/qsv.c          | 31 +++++++++++++++++-------

> >  libavcodec/qsv_internal.h |  7 +++---

> >  libavcodec/qsvdec.c       | 50 ++++++++++++++++++++++++++++++++++--

> ---

> >  libavcodec/qsvdec.h       |  2 ++

> >  libavcodec/qsvdec_h2645.c | 10 ++++++++  libavcodec/qsvdec_other.c |  5

> ++++

> >  libavcodec/qsvenc.c       |  8 ++++---

> >  7 files changed, 92 insertions(+), 21 deletions(-)

> >

> > diff --git a/libavcodec/qsv.c b/libavcodec/qsv.c index

> 994c9ebcb0..9e66fbc9da

> > 100644

> > --- a/libavcodec/qsv.c

> > +++ b/libavcodec/qsv.c

> > @@ -412,15 +412,19 @@ static int

> ff_qsv_set_display_handle(AVCodecContext

> > *avctx, QSVSession *qs)  #endif

> //AVCODEC_QSV_LINUX_SESSION_HANDLE

> >

> >  int ff_qsv_init_internal_session(AVCodecContext *avctx, QSVSession *qs,

> > -                                 const char *load_plugins)

> > +                                 const char *load_plugins, int

> > + gpu_copy)

> >  {

> > -    mfxIMPL impl   = MFX_IMPL_AUTO_ANY;

> > -    mfxVersion ver = { { QSV_VERSION_MINOR, QSV_VERSION_MAJOR } };

> > +    mfxIMPL          impl = MFX_IMPL_AUTO_ANY;

> > +    mfxVersion        ver = { { QSV_VERSION_MINOR,

> QSV_VERSION_MAJOR } };

> > +    mfxInitParam init_par = { MFX_IMPL_AUTO_ANY };

> >

> >      const char *desc;

> >      int ret;

> >

> > -    ret = MFXInit(impl, &ver, &qs->session);

> > +    init_par.GPUCopy        = gpu_copy;

> 

> GPUCopy field is introduced from API 1.16, would better to check it to avoid

> compile issue with old API.


> > @@ -802,8 +811,12 @@ int ff_qsv_init_session_frames(AVCodecContext

> *avctx,

> > mfxSession *psession,

> >

> >      int ret;

> >

> > +    if (gpu_copy == MFX_GPUCOPY_ON)

> > +        av_log(avctx, AV_LOG_WARNING, "GPU-accelerated memory copy "

> > +                                    "only works in

> > + MFX_IOPATTERN_OUT_SYSTEM_MEMORY.\n");

> 

> This looks weird:

> 1.  the waring log will always turn on if gpu_copy is true no matter what

> iopattern.

>      So would be better:

>     if (gpu_copy == MFX_GPUCOPY_ON && iopattern != system memory)

>         print a warning.

> 

> 2. It is only added for ff_qsv_init_session_frames(), but looks like should be

> apply for qsv_init_session()

> 


Thanks, will update and resend the patch soon.

> >

> > +static int ff_qsv_get_continuous_buffer(AVCodecContext *avctx,

> AVFrame

> > +*frame, AVBufferPool *pool) {

> > +    int ret = 0;

> > +

> > +    ff_decode_frame_props(avctx, frame);

> > +

> > +    frame->width       = avctx->width;

> > +    frame->height      = avctx->height;

> > +    frame->linesize[0] = FFALIGN(avctx->width, 128);

> > +    frame->linesize[1] = frame->linesize[0];

> > +    frame->buf[0]      = av_buffer_pool_get(pool);

> > +    if (!frame->buf[0])

> > +        return AVERROR(ENOMEM);

> > +

> > +    frame->data[0] = frame->buf[0]->data;

> > +    frame->data[1] = frame->data[0] +

> > +                            frame->linesize[0] * FFALIGN(avctx->height,

> > + 64);

> > +

> > +    ret = ff_attach_decode_data(frame);

> 

> Could you please explain why need this function? I don't see private_ref is

> needed from qsv decoding.


private_ref is required if a decoder declares the capability of AV_CODEC_CAP_DR1.
https://github.com/FFmpeg/FFmpeg/blob/master/libavcodec/decode.c#L654

- linjie
Zhong Li Sept. 29, 2019, 7:57 a.m. UTC | #3
> > > +static int ff_qsv_get_continuous_buffer(AVCodecContext *avctx,

> > AVFrame

> > > +*frame, AVBufferPool *pool) {

> > > +    int ret = 0;

> > > +

> > > +    ff_decode_frame_props(avctx, frame);

> > > +

> > > +    frame->width       = avctx->width;

> > > +    frame->height      = avctx->height;

> > > +    frame->linesize[0] = FFALIGN(avctx->width, 128);

> > > +    frame->linesize[1] = frame->linesize[0];

> > > +    frame->buf[0]      = av_buffer_pool_get(pool);

> > > +    if (!frame->buf[0])

> > > +        return AVERROR(ENOMEM);

> > > +

> > > +    frame->data[0] = frame->buf[0]->data;

> > > +    frame->data[1] = frame->data[0] +

> > > +                            frame->linesize[0] *

> > > + FFALIGN(avctx->height, 64);

> > > +

> > > +    ret = ff_attach_decode_data(frame);

> >

> > Could you please explain why need this function? I don't see

> > private_ref is needed from qsv decoding.

> 

> private_ref is required if a decoder declares the capability of

> AV_CODEC_CAP_DR1.

> https://github.com/FFmpeg/FFmpeg/blob/master/libavcodec/decode.c#L654

> 

> - Linjie


Ok, to keep consistency it is ok though get_buffer() is not called in the path of ff_qsv_get_continuous_buffer ().
diff mbox

Patch

diff --git a/libavcodec/qsv.c b/libavcodec/qsv.c
index 994c9ebcb0..9e66fbc9da 100644
--- a/libavcodec/qsv.c
+++ b/libavcodec/qsv.c
@@ -412,15 +412,19 @@  static int ff_qsv_set_display_handle(AVCodecContext *avctx, QSVSession *qs)
 #endif //AVCODEC_QSV_LINUX_SESSION_HANDLE
 
 int ff_qsv_init_internal_session(AVCodecContext *avctx, QSVSession *qs,
-                                 const char *load_plugins)
+                                 const char *load_plugins, int gpu_copy)
 {
-    mfxIMPL impl   = MFX_IMPL_AUTO_ANY;
-    mfxVersion ver = { { QSV_VERSION_MINOR, QSV_VERSION_MAJOR } };
+    mfxIMPL          impl = MFX_IMPL_AUTO_ANY;
+    mfxVersion        ver = { { QSV_VERSION_MINOR, QSV_VERSION_MAJOR } };
+    mfxInitParam init_par = { MFX_IMPL_AUTO_ANY };
 
     const char *desc;
     int ret;
 
-    ret = MFXInit(impl, &ver, &qs->session);
+    init_par.GPUCopy        = gpu_copy;
+    init_par.Implementation = impl;
+    init_par.Version        = ver;
+    ret = MFXInitEx(init_par, &qs->session);
     if (ret < 0)
         return ff_qsv_print_error(avctx, ret,
                                   "Error initializing an internal MFX session");
@@ -712,7 +716,8 @@  static mfxStatus qsv_frame_get_hdl(mfxHDL pthis, mfxMemId mid, mfxHDL *hdl)
 }
 
 int ff_qsv_init_session_device(AVCodecContext *avctx, mfxSession *psession,
-                               AVBufferRef *device_ref, const char *load_plugins)
+                               AVBufferRef *device_ref, const char *load_plugins,
+                               int gpu_copy)
 {
     static const mfxHandleType handle_types[] = {
         MFX_HANDLE_VA_DISPLAY,
@@ -722,11 +727,12 @@  int ff_qsv_init_session_device(AVCodecContext *avctx, mfxSession *psession,
     AVHWDeviceContext    *device_ctx = (AVHWDeviceContext*)device_ref->data;
     AVQSVDeviceContext *device_hwctx = device_ctx->hwctx;
     mfxSession        parent_session = device_hwctx->session;
+    mfxInitParam            init_par = { MFX_IMPL_AUTO_ANY };
+    mfxHDL                    handle = NULL;
 
     mfxSession    session;
     mfxVersion    ver;
     mfxIMPL       impl;
-    mfxHDL        handle = NULL;
     mfxHandleType handle_type;
     mfxStatus err;
 
@@ -752,7 +758,10 @@  int ff_qsv_init_session_device(AVCodecContext *avctx, mfxSession *psession,
                "from the session\n");
     }
 
-    err = MFXInit(impl, &ver, &session);
+    init_par.GPUCopy        = gpu_copy;
+    init_par.Implementation = impl;
+    init_par.Version        = ver;
+    err = MFXInitEx(init_par, &session);
     if (err != MFX_ERR_NONE)
         return ff_qsv_print_error(avctx, err,
                                   "Error initializing a child MFX session");
@@ -783,7 +792,7 @@  int ff_qsv_init_session_device(AVCodecContext *avctx, mfxSession *psession,
 
 int ff_qsv_init_session_frames(AVCodecContext *avctx, mfxSession *psession,
                                QSVFramesContext *qsv_frames_ctx,
-                               const char *load_plugins, int opaque)
+                               const char *load_plugins, int opaque, int gpu_copy)
 {
     mfxFrameAllocator frame_allocator = {
         .pthis  = qsv_frames_ctx,
@@ -802,8 +811,12 @@  int ff_qsv_init_session_frames(AVCodecContext *avctx, mfxSession *psession,
 
     int ret;
 
+    if (gpu_copy == MFX_GPUCOPY_ON)
+        av_log(avctx, AV_LOG_WARNING, "GPU-accelerated memory copy "
+                                    "only works in MFX_IOPATTERN_OUT_SYSTEM_MEMORY.\n");
+
     ret = ff_qsv_init_session_device(avctx, &session,
-                                     frames_ctx->device_ref, load_plugins);
+                                     frames_ctx->device_ref, load_plugins, gpu_copy);
     if (ret < 0)
         return ret;
 
diff --git a/libavcodec/qsv_internal.h b/libavcodec/qsv_internal.h
index 8b44a9b6f4..37559270e5 100644
--- a/libavcodec/qsv_internal.h
+++ b/libavcodec/qsv_internal.h
@@ -127,16 +127,17 @@  enum AVPictureType ff_qsv_map_pictype(int mfx_pic_type);
 enum AVFieldOrder ff_qsv_map_picstruct(int mfx_pic_struct);
 
 int ff_qsv_init_internal_session(AVCodecContext *avctx, QSVSession *qs,
-                                 const char *load_plugins);
+                                 const char *load_plugins, int gpu_copy);
 
 int ff_qsv_close_internal_session(QSVSession *qs);
 
 int ff_qsv_init_session_device(AVCodecContext *avctx, mfxSession *psession,
-                               AVBufferRef *device_ref, const char *load_plugins);
+                               AVBufferRef *device_ref, const char *load_plugins,
+                               int gpu_copy);
 
 int ff_qsv_init_session_frames(AVCodecContext *avctx, mfxSession *session,
                                QSVFramesContext *qsv_frames_ctx,
-                               const char *load_plugins, int opaque);
+                               const char *load_plugins, int opaque, int gpu_copy);
 
 int ff_qsv_find_surface_idx(QSVFramesContext *ctx, QSVFrame *frame);
 
diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c
index 9299596e33..a947583702 100644
--- a/libavcodec/qsvdec.c
+++ b/libavcodec/qsvdec.c
@@ -34,9 +34,11 @@ 
 #include "libavutil/pixdesc.h"
 #include "libavutil/pixfmt.h"
 #include "libavutil/time.h"
+#include "libavutil/imgutils.h"
 
 #include "avcodec.h"
 #include "internal.h"
+#include "decode.h"
 #include "qsv.h"
 #include "qsv_internal.h"
 #include "qsvdec.h"
@@ -54,6 +56,31 @@  const AVCodecHWConfigInternal *ff_qsv_hw_configs[] = {
     NULL
 };
 
+static int ff_qsv_get_continuous_buffer(AVCodecContext *avctx, AVFrame *frame, AVBufferPool *pool)
+{
+    int ret = 0;
+
+    ff_decode_frame_props(avctx, frame);
+
+    frame->width       = avctx->width;
+    frame->height      = avctx->height;
+    frame->linesize[0] = FFALIGN(avctx->width, 128);
+    frame->linesize[1] = frame->linesize[0];
+    frame->buf[0]      = av_buffer_pool_get(pool);
+    if (!frame->buf[0])
+        return AVERROR(ENOMEM);
+
+    frame->data[0] = frame->buf[0]->data;
+    frame->data[1] = frame->data[0] +
+                            frame->linesize[0] * FFALIGN(avctx->height, 64);
+
+    ret = ff_attach_decode_data(frame);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
 static int qsv_init_session(AVCodecContext *avctx, QSVContext *q, mfxSession session,
                             AVBufferRef *hw_frames_ref, AVBufferRef *hw_device_ref)
 {
@@ -74,7 +101,8 @@  static int qsv_init_session(AVCodecContext *avctx, QSVContext *q, mfxSession ses
 
         ret = ff_qsv_init_session_frames(avctx, &q->internal_qs.session,
                                          &q->frames_ctx, q->load_plugins,
-                                         q->iopattern == MFX_IOPATTERN_OUT_OPAQUE_MEMORY);
+                                         q->iopattern == MFX_IOPATTERN_OUT_OPAQUE_MEMORY,
+                                         q->gpu_copy);
         if (ret < 0) {
             av_buffer_unref(&q->frames_ctx.hw_frames_ctx);
             return ret;
@@ -88,7 +116,7 @@  static int qsv_init_session(AVCodecContext *avctx, QSVContext *q, mfxSession ses
         }
 
         ret = ff_qsv_init_session_device(avctx, &q->internal_qs.session,
-                                         hw_device_ref, q->load_plugins);
+                                         hw_device_ref, q->load_plugins, q->gpu_copy);
         if (ret < 0)
             return ret;
 
@@ -96,7 +124,7 @@  static int qsv_init_session(AVCodecContext *avctx, QSVContext *q, mfxSession ses
     } else {
         if (!q->internal_qs.session) {
             ret = ff_qsv_init_internal_session(avctx, &q->internal_qs,
-                                               q->load_plugins);
+                                               q->load_plugins, q->gpu_copy);
             if (ret < 0)
                 return ret;
         }
@@ -229,6 +257,9 @@  static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q, mfxVideoParam *
 
     q->frame_info = param->mfx.FrameInfo;
 
+    if (!avctx->hw_frames_ctx)
+        q->pool = av_buffer_pool_init(av_image_get_buffer_size(avctx->pix_fmt,
+                    FFALIGN(avctx->width, 128), FFALIGN(avctx->height, 64), 1), av_buffer_allocz);
     return 0;
 }
 
@@ -275,9 +306,15 @@  static int alloc_frame(AVCodecContext *avctx, QSVContext *q, QSVFrame *frame)
 {
     int ret;
 
-    ret = ff_get_buffer(avctx, frame->frame, AV_GET_BUFFER_FLAG_REF);
-    if (ret < 0)
-        return ret;
+    if (!q->pool) {
+        ret = ff_get_buffer(avctx, frame->frame, AV_GET_BUFFER_FLAG_REF);
+        if (ret < 0)
+            return ret;
+    } else {
+        ret = ff_qsv_get_continuous_buffer(avctx, frame->frame, q->pool);
+        if (ret < 0)
+            return ret;
+    }
 
     if (frame->frame->format == AV_PIX_FMT_QSV) {
         frame->surface = *(mfxFrameSurface1*)frame->frame->data[3];
@@ -535,6 +572,7 @@  int ff_qsv_decode_close(QSVContext *q)
 
     av_buffer_unref(&q->frames_ctx.hw_frames_ctx);
     av_buffer_unref(&q->frames_ctx.mids_buf);
+    av_buffer_pool_uninit(&q->pool);
 
     return 0;
 }
diff --git a/libavcodec/qsvdec.h b/libavcodec/qsvdec.h
index 64dc8d2f47..dec1f61ceb 100644
--- a/libavcodec/qsvdec.h
+++ b/libavcodec/qsvdec.h
@@ -59,12 +59,14 @@  typedef struct QSVContext {
     enum AVPixelFormat orig_pix_fmt;
     uint32_t fourcc;
     mfxFrameInfo frame_info;
+    AVBufferPool *pool;
 
     int initialized;
 
     // options set by the caller
     int async_depth;
     int iopattern;
+    int gpu_copy;
 
     char *load_plugins;
 
diff --git a/libavcodec/qsvdec_h2645.c b/libavcodec/qsvdec_h2645.c
index eb1dc336a4..d7ac00689a 100644
--- a/libavcodec/qsvdec_h2645.c
+++ b/libavcodec/qsvdec_h2645.c
@@ -193,6 +193,11 @@  static const AVOption hevc_options[] = {
 
     { "load_plugins", "A :-separate list of hexadecimal plugin UIDs to load in an internal session",
         OFFSET(qsv.load_plugins), AV_OPT_TYPE_STRING, { .str = "" }, 0, 0, VD },
+
+    { "gpu_copy", "A GPU-accelerated memory copy between video and system memory", OFFSET(qsv.gpu_copy), AV_OPT_TYPE_INT, { .i64 = MFX_GPUCOPY_DEFAULT }, MFX_GPUCOPY_DEFAULT, MFX_GPUCOPY_OFF, VD, "gpu_copy"},
+        { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_DEFAULT }, 0, 0, VD, "gpu_copy"},
+        { "on",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_ON },      0, 0, VD, "gpu_copy"},
+        { "off",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_OFF },     0, 0, VD, "gpu_copy"},
     { NULL },
 };
 
@@ -228,6 +233,11 @@  AVCodec ff_hevc_qsv_decoder = {
 #if CONFIG_H264_QSV_DECODER
 static const AVOption options[] = {
     { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT }, 1, INT_MAX, VD },
+
+    { "gpu_copy", "A GPU-accelerated copy between video and system memory", OFFSET(qsv.gpu_copy), AV_OPT_TYPE_INT, { .i64 = MFX_GPUCOPY_DEFAULT }, MFX_GPUCOPY_DEFAULT, MFX_GPUCOPY_OFF, VD, "gpu_copy"},
+    { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_DEFAULT }, 0, 0, VD, "gpu_copy"},
+    { "on",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_ON },      0, 0, VD, "gpu_copy"},
+    { "off",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_OFF },     0, 0, VD, "gpu_copy"},
     { NULL },
 };
 
diff --git a/libavcodec/qsvdec_other.c b/libavcodec/qsvdec_other.c
index b044c11540..13b1a99113 100644
--- a/libavcodec/qsvdec_other.c
+++ b/libavcodec/qsvdec_other.c
@@ -181,6 +181,11 @@  static void qsv_decode_flush(AVCodecContext *avctx)
 #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
 static const AVOption options[] = {
     { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT }, 1, INT_MAX, VD },
+
+    { "gpu_copy", "A GPU-accelerated memory copy between video and system memory", OFFSET(qsv.gpu_copy), AV_OPT_TYPE_INT, { .i64 = MFX_GPUCOPY_DEFAULT }, MFX_GPUCOPY_DEFAULT, MFX_GPUCOPY_OFF, VD, "gpu_copy"},
+    { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_DEFAULT }, 0, 0, VD, "gpu_copy"},
+    { "on",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_ON },      0, 0, VD, "gpu_copy"},
+    { "off",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_GPUCOPY_OFF },     0, 0, VD, "gpu_copy"},
     { NULL },
 };
 
diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c
index 207cdc1d61..ba85d645ca 100644
--- a/libavcodec/qsvenc.c
+++ b/libavcodec/qsvenc.c
@@ -956,7 +956,8 @@  static int qsvenc_init_session(AVCodecContext *avctx, QSVEncContext *q)
 
         ret = ff_qsv_init_session_frames(avctx, &q->internal_qs.session,
                                          &q->frames_ctx, q->load_plugins,
-                                         q->param.IOPattern == MFX_IOPATTERN_IN_OPAQUE_MEMORY);
+                                         q->param.IOPattern == MFX_IOPATTERN_IN_OPAQUE_MEMORY,
+                                         MFX_GPUCOPY_OFF);
         if (ret < 0) {
             av_buffer_unref(&q->frames_ctx.hw_frames_ctx);
             return ret;
@@ -965,14 +966,15 @@  static int qsvenc_init_session(AVCodecContext *avctx, QSVEncContext *q)
         q->session = q->internal_qs.session;
     } else if (avctx->hw_device_ctx) {
         ret = ff_qsv_init_session_device(avctx, &q->internal_qs.session,
-                                         avctx->hw_device_ctx, q->load_plugins);
+                                         avctx->hw_device_ctx, q->load_plugins,
+                                         MFX_GPUCOPY_OFF);
         if (ret < 0)
             return ret;
 
         q->session = q->internal_qs.session;
     } else {
         ret = ff_qsv_init_internal_session(avctx, &q->internal_qs,
-                                           q->load_plugins);
+                                           q->load_plugins, MFX_GPUCOPY_OFF);
         if (ret < 0)
             return ret;