diff mbox

[FFmpeg-devel,v3,3/6] lavc/qsvdec: Replace current parser with MFXVideoDECODE_DecodeHeader()

Message ID 20190308074028.13474-4-zhong.li@intel.com
State New
Headers show

Commit Message

Zhong Li March 8, 2019, 7:40 a.m. UTC
Using MSDK parser can improve qsv decoder pass rate in some cases (E.g:
sps declares a wrong level_idc, smaller than it should be).
And it is necessary for adding new qsv decoders such as MJPEG and VP9
since current parser can't provide enough information.
Actually using MFXVideoDECODE_DecodeHeader() was disscussed at
https://ffmpeg.org/pipermail/ffmpeg-devel/2015-July/175734.html and merged as commit 1acb19d,
but was overwritten when merged libav patches (commit: 1f26a23) without any explain.

v2: split decode header from decode_init, and call it for everyframe to
detect format/resoultion change. It can fix some regression issues such
as hevc 10bits decoding.

Signed-off-by: Zhong Li <zhong.li@intel.com>
---
 libavcodec/qsvdec.c       | 172 ++++++++++++++++++++------------------
 libavcodec/qsvdec.h       |   2 +
 libavcodec/qsvdec_h2645.c |   1 +
 libavcodec/qsvdec_other.c |   1 +
 4 files changed, 93 insertions(+), 83 deletions(-)

Comments

Rogozhkin, Dmitry V March 9, 2019, 12:48 a.m. UTC | #1
On Fri, 2019-03-08 at 15:40 +0800, Zhong Li wrote:
> Using MSDK parser can improve qsv decoder pass rate in some cases

> (E.g:

> sps declares a wrong level_idc, smaller than it should be).

> And it is necessary for adding new qsv decoders such as MJPEG and VP9

> since current parser can't provide enough information.

> Actually using MFXVideoDECODE_DecodeHeader() was disscussed at

> https://ffmpeg.org/pipermail/ffmpeg-devel/2015-July/175734.html and

> merged as commit 1acb19d,

> but was overwritten when merged libav patches (commit: 1f26a23)

> without any explain.

> 

> v2: split decode header from decode_init, and call it for everyframe

> to

> detect format/resoultion change. It can fix some regression issues

> such

> as hevc 10bits decoding.

> 

> Signed-off-by: Zhong Li <zhong.li@intel.com>

> ---

>  libavcodec/qsvdec.c       | 172 ++++++++++++++++++++--------------

> ----

>  libavcodec/qsvdec.h       |   2 +

>  libavcodec/qsvdec_h2645.c |   1 +

>  libavcodec/qsvdec_other.c |   1 +

>  4 files changed, 93 insertions(+), 83 deletions(-)

> 

> diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c

> index 4a0be811fb..b78026e14d 100644

> --- a/libavcodec/qsvdec.c

> +++ b/libavcodec/qsvdec.c

> @@ -120,19 +120,17 @@ static inline unsigned int qsv_fifo_size(const

> AVFifoBuffer* fifo)

>      return av_fifo_size(fifo) / qsv_fifo_item_size();

>  }

>  

> -static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q)

> +static int qsv_decode_preinit(AVCodecContext *avctx, QSVContext *q,

> enum AVPixelFormat *pix_fmts, mfxVideoParam *param)

>  {

> -    const AVPixFmtDescriptor *desc;

>      mfxSession session = NULL;

>      int iopattern = 0;

> -    mfxVideoParam param = { 0 };

> -    int frame_width  = avctx->coded_width;

> -    int frame_height = avctx->coded_height;

>      int ret;

>  

> -    desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);

> -    if (!desc)

> -        return AVERROR_BUG;

> +    ret = ff_get_format(avctx, pix_fmts);

> +    if (ret < 0) {

> +        q->orig_pix_fmt = avctx->pix_fmt = AV_PIX_FMT_NONE;

> +        return ret;

> +    }

>  

>      if (!q->async_fifo) {

>          q->async_fifo = av_fifo_alloc(q->async_depth *

> qsv_fifo_item_size());

> @@ -170,48 +168,72 @@ static int qsv_decode_init(AVCodecContext

> *avctx, QSVContext *q)

>          return ret;

>      }

>  

> -    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);

> -    if (ret < 0)

> -        return ret;

> +    param->IOPattern   = q->iopattern;

> +    param->AsyncDepth  = q->async_depth;

> +    param->ExtParam    = q->ext_buffers;

> +    param->NumExtParam = q->nb_ext_buffers;

>  

> -    param.mfx.CodecId      = ret;

> -    param.mfx.CodecProfile = ff_qsv_profile_to_mfx(avctx->codec_id,

> avctx->profile);

> -    param.mfx.CodecLevel   = avctx->level == FF_LEVEL_UNKNOWN ?

> MFX_LEVEL_UNKNOWN : avctx->level;

> -

> -    param.mfx.FrameInfo.BitDepthLuma   = desc->comp[0].depth;

> -    param.mfx.FrameInfo.BitDepthChroma = desc->comp[0].depth;

> -    param.mfx.FrameInfo.Shift          = desc->comp[0].depth > 8;

> -    param.mfx.FrameInfo.FourCC         = q->fourcc;

> -    param.mfx.FrameInfo.Width          = frame_width;

> -    param.mfx.FrameInfo.Height         = frame_height;

> -    param.mfx.FrameInfo.ChromaFormat   = MFX_CHROMAFORMAT_YUV420;

> -

> -    switch (avctx->field_order) {

> -    case AV_FIELD_PROGRESSIVE:

> -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_PROGRESSIVE;

> -        break;

> -    case AV_FIELD_TT:

> -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_FIELD_TFF;

> -        break;

> -    case AV_FIELD_BB:

> -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_FIELD_BFF;

> -        break;

> -    default:

> -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_UNKNOWN;

> -        break;

> -    }

> +    return 0;

> + }

>  

> -    param.IOPattern   = q->iopattern;

> -    param.AsyncDepth  = q->async_depth;

> -    param.ExtParam    = q->ext_buffers;

> -    param.NumExtParam = q->nb_ext_buffers;

> +static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q,

> mfxVideoParam *param)

> +{

> +    int ret;

>  

> -    ret = MFXVideoDECODE_Init(q->session, &param);

> +    avctx->width        = param->mfx.FrameInfo.CropW;

> +    avctx->height       = param->mfx.FrameInfo.CropH;

> +    avctx->coded_width  = param->mfx.FrameInfo.Width;

> +    avctx->coded_height = param->mfx.FrameInfo.Height;

> +    avctx->level        = param->mfx.CodecLevel;

> +    avctx->profile      = param->mfx.CodecProfile;

> +    avctx->field_order  = ff_qsv_map_picstruct(param-

> >mfx.FrameInfo.PicStruct);

> +    avctx->pix_fmt      = ff_qsv_map_fourcc(param-

> >mfx.FrameInfo.FourCC);

> +

> +    ret = MFXVideoDECODE_Init(q->session, param);

>      if (ret < 0)

>          return ff_qsv_print_error(avctx, ret,

>                                    "Error initializing the MFX video

> decoder");

>  

> -    q->frame_info = param.mfx.FrameInfo;

> +    q->frame_info = param->mfx.FrameInfo;

> +

> +    return 0;

> +}

> +

> +static int qsv_decode_header(AVCodecContext *avctx, QSVContext *q,

> AVPacket *avpkt, enum AVPixelFormat *pix_fmts, mfxVideoParam *param)

> +{

> +    int ret;

> +

> +    mfxBitstream bs = { { { 0 } } };

> +

> +    if (avpkt->size) {

> +        bs.Data       = avpkt->data;

> +        bs.DataLength = avpkt->size;

> +        bs.MaxLength  = bs.DataLength;

> +        bs.TimeStamp  = avpkt->pts;

> +        if (avctx->field_order == AV_FIELD_PROGRESSIVE)

> +            bs.DataFlag   |= MFX_BITSTREAM_COMPLETE_FRAME;

> +    } else

> +        return AVERROR_INVALIDDATA;

> +

> +

> +    if(!q->session) {

> +        ret = qsv_decode_preinit(avctx, q, pix_fmts, param);

> +        if (ret < 0)

> +            return ret;

> +    }

> +

> +    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);

> +    if (ret < 0)

> +        return ret;

> +

> +    param->mfx.CodecId = ret;

> +    ret = MFXVideoDECODE_DecodeHeader(q->session, &bs, param);

> +    if (MFX_ERR_MORE_DATA == ret) {

> +       return AVERROR(EAGAIN);

> +    }

> +    if (ret < 0)

> +        return ff_qsv_print_error(avctx, ret,

> +                "Error decoding stream header");

>  

>      return 0;

>  }

> @@ -494,7 +516,10 @@ int ff_qsv_process_data(AVCodecContext *avctx,

> QSVContext *q,

>      uint8_t *dummy_data;

>      int dummy_size;

>      int ret;

> -    const AVPixFmtDescriptor *desc;

> +    mfxVideoParam param = { 0 };

> +    enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,

> +                                       AV_PIX_FMT_NV12,

> +                                       AV_PIX_FMT_NONE };

>  

>      if (!q->avctx_internal) {

>          q->avctx_internal = avcodec_alloc_context3(NULL);

> @@ -508,7 +533,6 @@ int ff_qsv_process_data(AVCodecContext *avctx,

> QSVContext *q,

>              return AVERROR(ENOMEM);

>  

>          q->parser->flags |= PARSER_FLAG_COMPLETE_FRAMES;

> -        q->orig_pix_fmt   = AV_PIX_FMT_NONE;

>      }

>  

>      if (!pkt->size)

> @@ -521,15 +545,17 @@ int ff_qsv_process_data(AVCodecContext *avctx,

> QSVContext *q,

>                       pkt->data, pkt->size, pkt->pts, pkt->dts,

>                       pkt->pos);

>  

> -    avctx->field_order  = q->parser->field_order;

>      /* TODO: flush delayed frames on reinit */

> -    if (q->parser->format       != q->orig_pix_fmt    ||

> -        FFALIGN(q->parser->coded_width, 16)  != FFALIGN(avctx-

> >coded_width, 16) ||

> -        FFALIGN(q->parser->coded_height, 16) != FFALIGN(avctx-

> >coded_height, 16)) {

> -        enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,

> -                                           AV_PIX_FMT_NONE,

> -                                           AV_PIX_FMT_NONE };

> -        enum AVPixelFormat qsv_format;

> +

> +    // sw_pix_fmt was initialized as NV12, will be updated after

> header decoded if not true.

> +    if (q->orig_pix_fmt != AV_PIX_FMT_NONE)

> +        pix_fmts[1] = q->orig_pix_fmt;

Well, it still seems non self-explanatory to me why we have pix_fmts[1]
here.

> +

> +    ret = qsv_decode_header(avctx, q, pkt, pix_fmts, &param);

Parsing can be expensive to perform on each frame. See more below.

> +

> +    if (ret >= 0 && (q->orig_pix_fmt !=

> ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC) ||

> +        avctx->coded_width  != param.mfx.FrameInfo.Width ||

> +        avctx->coded_height != param.mfx.FrameInfo.Height)) {

>          AVPacket zero_pkt = {0};

You are actually trying to perform the task which is being done by
mediasdk. Moreover, you don't detect all the cases when mediasdk
decoder will require to be reset. For example, you may have 2 streams
concatenated which have perfectly the same resolution, but still
decoder reset will be required because of profile/level/gop structure
changes and memory reallocation or better to say additional allocation
could also be required because mediasdk may require different (bigger
or lesser) number of surfaces to properly handle new bitstream. See
more below.

>  

>          if (q->buffered_count) {

> @@ -538,45 +564,24 @@ int ff_qsv_process_data(AVCodecContext *avctx,

> QSVContext *q,

>              q->buffered_count--;

>              return qsv_decode(avctx, q, frame, got_frame,

> &zero_pkt);

>          }

> -

>          q->reinit_flag = 0;

>  

> -        qsv_format = ff_qsv_map_pixfmt(q->parser->format, &q-

> >fourcc);

> -        if (qsv_format < 0) {

> -            av_log(avctx, AV_LOG_ERROR,

> -                   "Decoding pixel format '%s' is not supported\n",

> -                   av_get_pix_fmt_name(q->parser->format));

> -            ret = AVERROR(ENOSYS);

> -            goto reinit_fail;

> -        }

> +        q->orig_pix_fmt = avctx->pix_fmt = pix_fmts[1] =

> ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC);

>  

> -        q->orig_pix_fmt     = q->parser->format;

> -        avctx->pix_fmt      = pix_fmts[1] = qsv_format;

> -        avctx->width        = q->parser->width;

> -        avctx->height       = q->parser->height;

> -        avctx->coded_width  = FFALIGN(q->parser->coded_width, 16);

> -        avctx->coded_height = FFALIGN(q->parser->coded_height, 16);

> -        avctx->level        = q->avctx_internal->level;

> -        avctx->profile      = q->avctx_internal->profile;

> +        avctx->coded_width  = param.mfx.FrameInfo.Width;

> +        avctx->coded_height = param.mfx.FrameInfo.Height;

>  

> -        ret = ff_get_format(avctx, pix_fmts);

> +        ret = qsv_decode_preinit(avctx, q, pix_fmts, &param);

>          if (ret < 0)

>              goto reinit_fail;

> +        q->initialized = 0;

> +    }

>  

> -        avctx->pix_fmt = ret;

> -

> -        desc = av_pix_fmt_desc_get(avctx->pix_fmt);

> -        if (!desc)

> -            goto reinit_fail;

> -

> -         if (desc->comp[0].depth > 8) {

> -            avctx->coded_width =  FFALIGN(q->parser->coded_width,

> 32);

> -            avctx->coded_height = FFALIGN(q->parser->coded_height,

> 32);

> -        }

> -

> -        ret = qsv_decode_init(avctx, q);

> +    if (!q->initialized) {

> +        ret = qsv_decode_init(avctx, q, &param);

>          if (ret < 0)

>              goto reinit_fail;

> +        q->initialized = 1;

>      }

>  

>      return qsv_decode(avctx, q, frame, got_frame, pkt);


So, from my perspective you should not attempt to call decode header on
each incoming data portion and try to interpret results. Instead you
should respect what mediasdk DecodeFrameAsync is returning. Basically
it will return MFX_ERR_INCOMPATIBLE_VIDEO_PARAM is decoder reset is
required. This gives you 2 situations when you should call
DecodeHeader:
1. You either call it if decoder is not initalized at all
2. Or you call it upon receiving MFX_ERR_INCOMPATIBLE_VIDEO_PARAM

Once you received MFX_ERR_INCOMPATIBLE_VIDEO_PARAM step to perform are:
1. Flush cached frames from medisdk decoder passing nullptr bitstream
to DecodeFrameAsync
2. Run decode header and get updated parameters
3. Check (via QueryIOSurf) and potentially reallocate surfaces to
satisfy new bitstream requirement
4. Reset decoder with new parameters

Application may attempt to optimize procedure on step 3 above and avoid
surfaces reallocation if possible. For example, if you have allocated
1920x1080 surfaces and resolution changed to 720x480, you may actually
stay on 1920x1080 if you are ok to use more memory hoping to receive
1920x1080 stream in the future again. Eventually this would work if you
have enough number of 1920x1080 surfaces.

> @@ -589,4 +594,5 @@ reinit_fail:

>  void ff_qsv_decode_flush(AVCodecContext *avctx, QSVContext *q)

>  {

>      q->orig_pix_fmt = AV_PIX_FMT_NONE;

> +    q->initialized = 0;

>  }

> diff --git a/libavcodec/qsvdec.h b/libavcodec/qsvdec.h

> index 111536caba..4812fb2a6b 100644

> --- a/libavcodec/qsvdec.h

> +++ b/libavcodec/qsvdec.h

> @@ -63,6 +63,8 @@ typedef struct QSVContext {

>      uint32_t fourcc;

>      mfxFrameInfo frame_info;

>  

> +    int initialized;

> +

>      // options set by the caller

>      int async_depth;

>      int iopattern;

> diff --git a/libavcodec/qsvdec_h2645.c b/libavcodec/qsvdec_h2645.c

> index 9b49f5506e..eb1dc336a4 100644

> --- a/libavcodec/qsvdec_h2645.c

> +++ b/libavcodec/qsvdec_h2645.c

> @@ -103,6 +103,7 @@ static av_cold int qsv_decode_init(AVCodecContext

> *avctx)

>          }

>      }

>  

> +    s->qsv.orig_pix_fmt = AV_PIX_FMT_NV12;

>      s->packet_fifo = av_fifo_alloc(sizeof(AVPacket));

>      if (!s->packet_fifo) {

>          ret = AVERROR(ENOMEM);

> diff --git a/libavcodec/qsvdec_other.c b/libavcodec/qsvdec_other.c

> index 03251d2c85..a6f1b88ca0 100644

> --- a/libavcodec/qsvdec_other.c

> +++ b/libavcodec/qsvdec_other.c

> @@ -90,6 +90,7 @@ static av_cold int qsv_decode_init(AVCodecContext

> *avctx)

>      }

>  #endif

>  

> +    s->qsv.orig_pix_fmt = AV_PIX_FMT_NV12;

>      s->packet_fifo = av_fifo_alloc(sizeof(AVPacket));

>      if (!s->packet_fifo) {

>          ret = AVERROR(ENOMEM);
Zhong Li March 11, 2019, 9:23 a.m. UTC | #2
> -----Original Message-----

> From: Rogozhkin, Dmitry V

> Sent: Saturday, March 9, 2019 8:48 AM

> To: ffmpeg-devel@ffmpeg.org

> Cc: Li, Zhong <zhong.li@intel.com>

> Subject: Re: [FFmpeg-devel] [PATCH v3 3/6] lavc/qsvdec: Replace current

> parser with MFXVideoDECODE_DecodeHeader()

> 

> On Fri, 2019-03-08 at 15:40 +0800, Zhong Li wrote:

> > Using MSDK parser can improve qsv decoder pass rate in some cases

> > (E.g:

> > sps declares a wrong level_idc, smaller than it should be).

> > And it is necessary for adding new qsv decoders such as MJPEG and VP9

> > since current parser can't provide enough information.

> > Actually using MFXVideoDECODE_DecodeHeader() was disscussed at

> > https://ffmpeg.org/pipermail/ffmpeg-devel/2015-July/175734.html and

> > merged as commit 1acb19d, but was overwritten when merged libav

> > patches (commit: 1f26a23) without any explain.

> >

> > v2: split decode header from decode_init, and call it for everyframe

> > to detect format/resoultion change. It can fix some regression issues

> > such as hevc 10bits decoding.

> >

> > Signed-off-by: Zhong Li <zhong.li@intel.com>

> > ---

> >  libavcodec/qsvdec.c       | 172 ++++++++++++++++++++--------------

> > ----

> >  libavcodec/qsvdec.h       |   2 +

> >  libavcodec/qsvdec_h2645.c |   1 +

> >  libavcodec/qsvdec_other.c |   1 +

> >  4 files changed, 93 insertions(+), 83 deletions(-)

> >

> > diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c index

> > 4a0be811fb..b78026e14d 100644

> > --- a/libavcodec/qsvdec.c

> > +++ b/libavcodec/qsvdec.c

> > @@ -120,19 +120,17 @@ static inline unsigned int qsv_fifo_size(const

> > AVFifoBuffer* fifo)

> >      return av_fifo_size(fifo) / qsv_fifo_item_size();

> >  }

> >

> > -static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q)

> > +static int qsv_decode_preinit(AVCodecContext *avctx, QSVContext *q,

> > enum AVPixelFormat *pix_fmts, mfxVideoParam *param)

> >  {

> > -    const AVPixFmtDescriptor *desc;

> >      mfxSession session = NULL;

> >      int iopattern = 0;

> > -    mfxVideoParam param = { 0 };

> > -    int frame_width  = avctx->coded_width;

> > -    int frame_height = avctx->coded_height;

> >      int ret;

> >

> > -    desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);

> > -    if (!desc)

> > -        return AVERROR_BUG;

> > +    ret = ff_get_format(avctx, pix_fmts);

> > +    if (ret < 0) {

> > +        q->orig_pix_fmt = avctx->pix_fmt = AV_PIX_FMT_NONE;

> > +        return ret;

> > +    }

> >

> >      if (!q->async_fifo) {

> >          q->async_fifo = av_fifo_alloc(q->async_depth *

> > qsv_fifo_item_size()); @@ -170,48 +168,72 @@ static int

> > qsv_decode_init(AVCodecContext *avctx, QSVContext *q)

> >          return ret;

> >      }

> >

> > -    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);

> > -    if (ret < 0)

> > -        return ret;

> > +    param->IOPattern   = q->iopattern;

> > +    param->AsyncDepth  = q->async_depth;

> > +    param->ExtParam    = q->ext_buffers;

> > +    param->NumExtParam = q->nb_ext_buffers;

> >

> > -    param.mfx.CodecId      = ret;

> > -    param.mfx.CodecProfile = ff_qsv_profile_to_mfx(avctx->codec_id,

> > avctx->profile);

> > -    param.mfx.CodecLevel   = avctx->level ==

> FF_LEVEL_UNKNOWN ?

> > MFX_LEVEL_UNKNOWN : avctx->level;

> > -

> > -    param.mfx.FrameInfo.BitDepthLuma   = desc->comp[0].depth;

> > -    param.mfx.FrameInfo.BitDepthChroma = desc->comp[0].depth;

> > -    param.mfx.FrameInfo.Shift          = desc->comp[0].depth >

> 8;

> > -    param.mfx.FrameInfo.FourCC         = q->fourcc;

> > -    param.mfx.FrameInfo.Width          = frame_width;

> > -    param.mfx.FrameInfo.Height         = frame_height;

> > -    param.mfx.FrameInfo.ChromaFormat   =

> MFX_CHROMAFORMAT_YUV420;

> > -

> > -    switch (avctx->field_order) {

> > -    case AV_FIELD_PROGRESSIVE:

> > -        param.mfx.FrameInfo.PicStruct =

> MFX_PICSTRUCT_PROGRESSIVE;

> > -        break;

> > -    case AV_FIELD_TT:

> > -        param.mfx.FrameInfo.PicStruct =

> MFX_PICSTRUCT_FIELD_TFF;

> > -        break;

> > -    case AV_FIELD_BB:

> > -        param.mfx.FrameInfo.PicStruct =

> MFX_PICSTRUCT_FIELD_BFF;

> > -        break;

> > -    default:

> > -        param.mfx.FrameInfo.PicStruct =

> MFX_PICSTRUCT_UNKNOWN;

> > -        break;

> > -    }

> > +    return 0;

> > + }

> >

> > -    param.IOPattern   = q->iopattern;

> > -    param.AsyncDepth  = q->async_depth;

> > -    param.ExtParam    = q->ext_buffers;

> > -    param.NumExtParam = q->nb_ext_buffers;

> > +static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q,

> > mfxVideoParam *param)

> > +{

> > +    int ret;

> >

> > -    ret = MFXVideoDECODE_Init(q->session, &param);

> > +    avctx->width        = param->mfx.FrameInfo.CropW;

> > +    avctx->height       = param->mfx.FrameInfo.CropH;

> > +    avctx->coded_width  = param->mfx.FrameInfo.Width;

> > +    avctx->coded_height = param->mfx.FrameInfo.Height;

> > +    avctx->level        = param->mfx.CodecLevel;

> > +    avctx->profile      = param->mfx.CodecProfile;

> > +    avctx->field_order  = ff_qsv_map_picstruct(param-

> > >mfx.FrameInfo.PicStruct);

> > +    avctx->pix_fmt      = ff_qsv_map_fourcc(param-

> > >mfx.FrameInfo.FourCC);

> > +

> > +    ret = MFXVideoDECODE_Init(q->session, param);

> >      if (ret < 0)

> >          return ff_qsv_print_error(avctx, ret,

> >                                    "Error initializing the

> MFX video

> > decoder");

> >

> > -    q->frame_info = param.mfx.FrameInfo;

> > +    q->frame_info = param->mfx.FrameInfo;

> > +

> > +    return 0;

> > +}

> > +

> > +static int qsv_decode_header(AVCodecContext *avctx, QSVContext *q,

> > AVPacket *avpkt, enum AVPixelFormat *pix_fmts, mfxVideoParam *param)

> > +{

> > +    int ret;

> > +

> > +    mfxBitstream bs = { { { 0 } } };

> > +

> > +    if (avpkt->size) {

> > +        bs.Data       = avpkt->data;

> > +        bs.DataLength = avpkt->size;

> > +        bs.MaxLength  = bs.DataLength;

> > +        bs.TimeStamp  = avpkt->pts;

> > +        if (avctx->field_order == AV_FIELD_PROGRESSIVE)

> > +            bs.DataFlag   |=

> MFX_BITSTREAM_COMPLETE_FRAME;

> > +    } else

> > +        return AVERROR_INVALIDDATA;

> > +

> > +

> > +    if(!q->session) {

> > +        ret = qsv_decode_preinit(avctx, q, pix_fmts, param);

> > +        if (ret < 0)

> > +            return ret;

> > +    }

> > +

> > +    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);

> > +    if (ret < 0)

> > +        return ret;

> > +

> > +    param->mfx.CodecId = ret;

> > +    ret = MFXVideoDECODE_DecodeHeader(q->session, &bs, param);

> > +    if (MFX_ERR_MORE_DATA == ret) {

> > +       return AVERROR(EAGAIN);

> > +    }

> > +    if (ret < 0)

> > +        return ff_qsv_print_error(avctx, ret,

> > +                "Error decoding stream header");

> >

> >      return 0;

> >  }

> > @@ -494,7 +516,10 @@ int ff_qsv_process_data(AVCodecContext *avctx,

> > QSVContext *q,

> >      uint8_t *dummy_data;

> >      int dummy_size;

> >      int ret;

> > -    const AVPixFmtDescriptor *desc;

> > +    mfxVideoParam param = { 0 };

> > +    enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,

> >

> +                                       AV_PIX_FMT_NV

> 12,

> >

> +                                       AV_PIX_FMT_N

> ONE };

> >

> >      if (!q->avctx_internal) {

> >          q->avctx_internal = avcodec_alloc_context3(NULL); @@

> -508,7

> > +533,6 @@ int ff_qsv_process_data(AVCodecContext *avctx, QSVContext

> > *q,

> >              return AVERROR(ENOMEM);

> >

> >          q->parser->flags |= PARSER_FLAG_COMPLETE_FRAMES;

> > -        q->orig_pix_fmt   = AV_PIX_FMT_NONE;

> >      }

> >

> >      if (!pkt->size)

> > @@ -521,15 +545,17 @@ int ff_qsv_process_data(AVCodecContext

> *avctx,

> > QSVContext *q,

> >                       pkt->data, pkt->size, pkt->pts, pkt->dts,

> >                       pkt->pos);

> >

> > -    avctx->field_order  = q->parser->field_order;

> >      /* TODO: flush delayed frames on reinit */

> > -    if (q->parser->format       != q->orig_pix_fmt    ||

> > -        FFALIGN(q->parser->coded_width, 16)  != FFALIGN(avctx-

> > >coded_width, 16) ||

> > -        FFALIGN(q->parser->coded_height, 16) != FFALIGN(avctx-

> > >coded_height, 16)) {

> > -        enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,

> >

> -                                           AV_PIX_F

> MT_NONE,

> >

> -                                           AV_PIX_F

> MT_NONE };

> > -        enum AVPixelFormat qsv_format;

> > +

> > +    // sw_pix_fmt was initialized as NV12, will be updated after

> > header decoded if not true.

> > +    if (q->orig_pix_fmt != AV_PIX_FMT_NONE)

> > +        pix_fmts[1] = q->orig_pix_fmt;

> Well, it still seems non self-explanatory to me why we have pix_fmts[1] here.


I think it has been explained and descripted in my comment. It means sw_pix_fmt.
For more detail, probably you want to check the implementation of ff_get_format().

> > +

> > +    ret = qsv_decode_header(avctx, q, pkt, pix_fmts, &param);

> Parsing can be expensive to perform on each frame. See more below.


1. Parsing is just to reading header bits, I don't think it is CPU heavy calculation task, compared with slice level decoding. 
2. Mostly only key frames have completed header. For non-key frame, just return MFX_ERR_MORE_DATA instead of real parsing work. 

> > +

> > +    if (ret >= 0 && (q->orig_pix_fmt !=

> > ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC) ||

> > +        avctx->coded_width  != param.mfx.FrameInfo.Width ||

> > +        avctx->coded_height != param.mfx.FrameInfo.Height)) {

> >          AVPacket zero_pkt = {0};

> You are actually trying to perform the task which is being done by mediasdk.

> Moreover, you don't detect all the cases when mediasdk decoder will require

> to be reset. For example, you may have 2 streams concatenated which have

> perfectly the same resolution, but still decoder reset will be required because

> of profile/level/gop structure changes and memory reallocation or better to

> say additional allocation could also be required because mediasdk may

> require different (bigger or lesser) number of surfaces to properly handle

> new bitstream. See more below.


Which case of gop structure changing need to reset? Could you please, give an example.

> >

> >          if (q->buffered_count) {

> > @@ -538,45 +564,24 @@ int ff_qsv_process_data(AVCodecContext

> *avctx,

> > QSVContext *q,

> >              q->buffered_count--;

> >              return qsv_decode(avctx, q, frame, got_frame,

> &zero_pkt);

> >          }

> > -

> >          q->reinit_flag = 0;

> >

> > -        qsv_format = ff_qsv_map_pixfmt(q->parser->format, &q-

> > >fourcc);

> > -        if (qsv_format < 0) {

> > -            av_log(avctx, AV_LOG_ERROR,

> > -                   "Decoding pixel format '%s' is not

> supported\n",

> > -                   av_get_pix_fmt_name(q->parser->format));

> > -            ret = AVERROR(ENOSYS);

> > -            goto reinit_fail;

> > -        }

> > +        q->orig_pix_fmt = avctx->pix_fmt = pix_fmts[1] =

> > ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC);

> >

> > -        q->orig_pix_fmt     = q->parser->format;

> > -        avctx->pix_fmt      = pix_fmts[1] = qsv_format;

> > -        avctx->width        = q->parser->width;

> > -        avctx->height       = q->parser->height;

> > -        avctx->coded_width  = FFALIGN(q->parser->coded_width,

> 16);

> > -        avctx->coded_height = FFALIGN(q->parser->coded_height,

> 16);

> > -        avctx->level        = q->avctx_internal->level;

> > -        avctx->profile      = q->avctx_internal->profile;

> > +        avctx->coded_width  = param.mfx.FrameInfo.Width;

> > +        avctx->coded_height = param.mfx.FrameInfo.Height;

> >

> > -        ret = ff_get_format(avctx, pix_fmts);

> > +        ret = qsv_decode_preinit(avctx, q, pix_fmts, &param);

> >          if (ret < 0)

> >              goto reinit_fail;

> > +        q->initialized = 0;

> > +    }

> >

> > -        avctx->pix_fmt = ret;

> > -

> > -        desc = av_pix_fmt_desc_get(avctx->pix_fmt);

> > -        if (!desc)

> > -            goto reinit_fail;

> > -

> > -         if (desc->comp[0].depth > 8) {

> > -            avctx->coded_width

> =  FFALIGN(q->parser->coded_width,

> > 32);

> > -            avctx->coded_height =

> FFALIGN(q->parser->coded_height,

> > 32);

> > -        }

> > -

> > -        ret = qsv_decode_init(avctx, q);

> > +    if (!q->initialized) {

> > +        ret = qsv_decode_init(avctx, q, &param);

> >          if (ret < 0)

> >              goto reinit_fail;

> > +        q->initialized = 1;

> >      }

> >

> >      return qsv_decode(avctx, q, frame, got_frame, pkt);

> 

> So, from my perspective you should not attempt to call decode header on

> each incoming data portion and try to interpret results. Instead you should

> respect what mediasdk DecodeFrameAsync is returning. Basically it will

> return MFX_ERR_INCOMPATIBLE_VIDEO_PARAM is decoder reset is required.

> This gives you 2 situations when you should call

> DecodeHeader:

> 1. You either call it if decoder is not initalized at all 2. Or you call it upon

> receiving MFX_ERR_INCOMPATIBLE_VIDEO_PARAM

> 

> Once you received MFX_ERR_INCOMPATIBLE_VIDEO_PARAM step to

> perform are:

> 1. Flush cached frames from medisdk decoder passing nullptr bitstream to

> DecodeFrameAsync 2. Run decode header and get updated parameters 3.

> Check (via QueryIOSurf) and potentially reallocate surfaces to satisfy new

> bitstream requirement 4. Reset decoder with new parameters


1. This patch is just to replace original parser to MSDK parser. I don't like to mix other thing here (thus probably introduces new bugs, but full round testing has been done for this patch).
  If it is really necessary, should be done by an additional patch. 
2. Unfortunately, if we don't reset and set correct resolution/pix_fmt, FFmpeg-qsv already failed in the progress of MSDK initialization (e,g: pass a NV12 format but actually it is P010. see : https://github.com/FFmpeg/FFmpeg/blob/master/libavcodec/qsv.c#L424 )
  There is no opportunity to get the status of DecodeFrameAsync at these case. I haven't compared MSDK sample decoder with current FFmpeg (Will take a look later). But IIRC, MSDK sample decoder isn't doing a better job than ffmpeg-qsv for reinit case.
  (one example, it is hang when run sample_decode h264 -i /fate-suite/h264/reinit-large_420_8-to-small_420_8.h264 -r , and this clip is changing from a larger resolution to a smaller one. I wonder to know what will happen if change resolution from smaller to a larger, eg, 720x480 to 1920x1080).
3. IMHO, reinit work is better to do before decoding, instead of decoding it and recover it after it is failed. 
4. This a common function for all decoders, I would like to see it is under control of ffmpeg. MSDK may have no unified implementation of all codecs ( e.g: https://github.com/Intel-Media-SDK/MediaSDK/issues/333 ).
  Your suggestion may be addressed only when I see any new issues or bugs. But currently I can't see the necessity. 

> Application may attempt to optimize procedure on step 3 above and avoid

> surfaces reallocation if possible. For example, if you have allocated

> 1920x1080 surfaces and resolution changed to 720x480, you may actually

> stay on 1920x1080 if you are ok to use more memory hoping to receive

> 1920x1080 stream in the future again. Eventually this would work if you have

> enough number of 1920x1080 surfaces.


Hm, this is a simpler topic, https://patchwork.ffmpeg.org/patch/12257/ is to prepare this though decoder should be updated too. But as previous discussion, thus should be a separated patch and need pre-testing.
Rogozhkin, Dmitry V March 11, 2019, 11:37 p.m. UTC | #3
On Mon, 2019-03-11 at 17:23 +0800, Li, Zhong wrote:
> > -----Original Message-----

> > From: Rogozhkin, Dmitry V

> > Sent: Saturday, March 9, 2019 8:48 AM

> > To: ffmpeg-devel@ffmpeg.org

> > Cc: Li, Zhong <zhong.li@intel.com>

> > Subject: Re: [FFmpeg-devel] [PATCH v3 3/6] lavc/qsvdec: Replace

> > current

> > parser with MFXVideoDECODE_DecodeHeader()

> > 

> > On Fri, 2019-03-08 at 15:40 +0800, Zhong Li wrote:

> > > Using MSDK parser can improve qsv decoder pass rate in some cases

> > > (E.g:

> > > sps declares a wrong level_idc, smaller than it should be).

> > > And it is necessary for adding new qsv decoders such as MJPEG and

> > > VP9

> > > since current parser can't provide enough information.

> > > Actually using MFXVideoDECODE_DecodeHeader() was disscussed at

> > > https://ffmpeg.org/pipermail/ffmpeg-devel/2015-July/175734.html

> > > and

> > > merged as commit 1acb19d, but was overwritten when merged libav

> > > patches (commit: 1f26a23) without any explain.

> > > 

> > > v2: split decode header from decode_init, and call it for

> > > everyframe

> > > to detect format/resoultion change. It can fix some regression

> > > issues

> > > such as hevc 10bits decoding.

> > > 

> > > Signed-off-by: Zhong Li <zhong.li@intel.com>

> > > ---

> > >  libavcodec/qsvdec.c       | 172 ++++++++++++++++++++----------

> > > ----

> > > ----

> > >  libavcodec/qsvdec.h       |   2 +

> > >  libavcodec/qsvdec_h2645.c |   1 +

> > >  libavcodec/qsvdec_other.c |   1 +

> > >  4 files changed, 93 insertions(+), 83 deletions(-)

> > > 

> > > diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c index

> > > 4a0be811fb..b78026e14d 100644

> > > --- a/libavcodec/qsvdec.c

> > > +++ b/libavcodec/qsvdec.c

> > > @@ -120,19 +120,17 @@ static inline unsigned int

> > > qsv_fifo_size(const

> > > AVFifoBuffer* fifo)

> > >      return av_fifo_size(fifo) / qsv_fifo_item_size();

> > >  }

> > > 

> > > -static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q)

> > > +static int qsv_decode_preinit(AVCodecContext *avctx, QSVContext

> > > *q,

> > > enum AVPixelFormat *pix_fmts, mfxVideoParam *param)

> > >  {

> > > -    const AVPixFmtDescriptor *desc;

> > >      mfxSession session = NULL;

> > >      int iopattern = 0;

> > > -    mfxVideoParam param = { 0 };

> > > -    int frame_width  = avctx->coded_width;

> > > -    int frame_height = avctx->coded_height;

> > >      int ret;

> > > 

> > > -    desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);

> > > -    if (!desc)

> > > -        return AVERROR_BUG;

> > > +    ret = ff_get_format(avctx, pix_fmts);

> > > +    if (ret < 0) {

> > > +        q->orig_pix_fmt = avctx->pix_fmt = AV_PIX_FMT_NONE;

> > > +        return ret;

> > > +    }

> > > 

> > >      if (!q->async_fifo) {

> > >          q->async_fifo = av_fifo_alloc(q->async_depth *

> > > qsv_fifo_item_size()); @@ -170,48 +168,72 @@ static int

> > > qsv_decode_init(AVCodecContext *avctx, QSVContext *q)

> > >          return ret;

> > >      }

> > > 

> > > -    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);

> > > -    if (ret < 0)

> > > -        return ret;

> > > +    param->IOPattern   = q->iopattern;

> > > +    param->AsyncDepth  = q->async_depth;

> > > +    param->ExtParam    = q->ext_buffers;

> > > +    param->NumExtParam = q->nb_ext_buffers;

> > > 

> > > -    param.mfx.CodecId      = ret;

> > > -    param.mfx.CodecProfile = ff_qsv_profile_to_mfx(avctx-

> > > >codec_id,

> > > avctx->profile);

> > > -    param.mfx.CodecLevel   = avctx->level ==

> > 

> > FF_LEVEL_UNKNOWN ?

> > > MFX_LEVEL_UNKNOWN : avctx->level;

> > > -

> > > -    param.mfx.FrameInfo.BitDepthLuma   = desc->comp[0].depth;

> > > -    param.mfx.FrameInfo.BitDepthChroma = desc->comp[0].depth;

> > > -    param.mfx.FrameInfo.Shift          = desc->comp[0].depth >

> > 

> > 8;

> > > -    param.mfx.FrameInfo.FourCC         = q->fourcc;

> > > -    param.mfx.FrameInfo.Width          = frame_width;

> > > -    param.mfx.FrameInfo.Height         = frame_height;

> > > -    param.mfx.FrameInfo.ChromaFormat   =

> > 

> > MFX_CHROMAFORMAT_YUV420;

> > > -

> > > -    switch (avctx->field_order) {

> > > -    case AV_FIELD_PROGRESSIVE:

> > > -        param.mfx.FrameInfo.PicStruct =

> > 

> > MFX_PICSTRUCT_PROGRESSIVE;

> > > -        break;

> > > -    case AV_FIELD_TT:

> > > -        param.mfx.FrameInfo.PicStruct =

> > 

> > MFX_PICSTRUCT_FIELD_TFF;

> > > -        break;

> > > -    case AV_FIELD_BB:

> > > -        param.mfx.FrameInfo.PicStruct =

> > 

> > MFX_PICSTRUCT_FIELD_BFF;

> > > -        break;

> > > -    default:

> > > -        param.mfx.FrameInfo.PicStruct =

> > 

> > MFX_PICSTRUCT_UNKNOWN;

> > > -        break;

> > > -    }

> > > +    return 0;

> > > + }

> > > 

> > > -    param.IOPattern   = q->iopattern;

> > > -    param.AsyncDepth  = q->async_depth;

> > > -    param.ExtParam    = q->ext_buffers;

> > > -    param.NumExtParam = q->nb_ext_buffers;

> > > +static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q,

> > > mfxVideoParam *param)

> > > +{

> > > +    int ret;

> > > 

> > > -    ret = MFXVideoDECODE_Init(q->session, &param);

> > > +    avctx->width        = param->mfx.FrameInfo.CropW;

> > > +    avctx->height       = param->mfx.FrameInfo.CropH;

> > > +    avctx->coded_width  = param->mfx.FrameInfo.Width;

> > > +    avctx->coded_height = param->mfx.FrameInfo.Height;

> > > +    avctx->level        = param->mfx.CodecLevel;

> > > +    avctx->profile      = param->mfx.CodecProfile;

> > > +    avctx->field_order  = ff_qsv_map_picstruct(param-

> > > > mfx.FrameInfo.PicStruct);

> > > 

> > > +    avctx->pix_fmt      = ff_qsv_map_fourcc(param-

> > > > mfx.FrameInfo.FourCC);

> > > 

> > > +

> > > +    ret = MFXVideoDECODE_Init(q->session, param);

> > >      if (ret < 0)

> > >          return ff_qsv_print_error(avctx, ret,

> > >                                    "Error initializing the

> > 

> > MFX video

> > > decoder");

> > > 

> > > -    q->frame_info = param.mfx.FrameInfo;

> > > +    q->frame_info = param->mfx.FrameInfo;

> > > +

> > > +    return 0;

> > > +}

> > > +

> > > +static int qsv_decode_header(AVCodecContext *avctx, QSVContext

> > > *q,

> > > AVPacket *avpkt, enum AVPixelFormat *pix_fmts, mfxVideoParam

> > > *param)

> > > +{

> > > +    int ret;

> > > +

> > > +    mfxBitstream bs = { { { 0 } } };

> > > +

> > > +    if (avpkt->size) {

> > > +        bs.Data       = avpkt->data;

> > > +        bs.DataLength = avpkt->size;

> > > +        bs.MaxLength  = bs.DataLength;

> > > +        bs.TimeStamp  = avpkt->pts;

> > > +        if (avctx->field_order == AV_FIELD_PROGRESSIVE)

> > > +            bs.DataFlag   |=

> > 

> > MFX_BITSTREAM_COMPLETE_FRAME;

> > > +    } else

> > > +        return AVERROR_INVALIDDATA;

> > > +

> > > +

> > > +    if(!q->session) {

> > > +        ret = qsv_decode_preinit(avctx, q, pix_fmts, param);

> > > +        if (ret < 0)

> > > +            return ret;

> > > +    }

> > > +

> > > +    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);

> > > +    if (ret < 0)

> > > +        return ret;

> > > +

> > > +    param->mfx.CodecId = ret;

> > > +    ret = MFXVideoDECODE_DecodeHeader(q->session, &bs, param);

> > > +    if (MFX_ERR_MORE_DATA == ret) {

> > > +       return AVERROR(EAGAIN);

> > > +    }

> > > +    if (ret < 0)

> > > +        return ff_qsv_print_error(avctx, ret,

> > > +                "Error decoding stream header");

> > > 

> > >      return 0;

> > >  }

> > > @@ -494,7 +516,10 @@ int ff_qsv_process_data(AVCodecContext

> > > *avctx,

> > > QSVContext *q,

> > >      uint8_t *dummy_data;

> > >      int dummy_size;

> > >      int ret;

> > > -    const AVPixFmtDescriptor *desc;

> > > +    mfxVideoParam param = { 0 };

> > > +    enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,

> > > 

> > 

> > +                                       AV_PIX_FMT_NV

> > 12,

> > > 

> > 

> > +                                       AV_PIX_FMT_N

> > ONE };

> > > 

> > >      if (!q->avctx_internal) {

> > >          q->avctx_internal = avcodec_alloc_context3(NULL); @@

> > 

> > -508,7

> > > +533,6 @@ int ff_qsv_process_data(AVCodecContext *avctx,

> > > QSVContext

> > > *q,

> > >              return AVERROR(ENOMEM);

> > > 

> > >          q->parser->flags |= PARSER_FLAG_COMPLETE_FRAMES;

> > > -        q->orig_pix_fmt   = AV_PIX_FMT_NONE;

> > >      }

> > > 

> > >      if (!pkt->size)

> > > @@ -521,15 +545,17 @@ int ff_qsv_process_data(AVCodecContext

> > 

> > *avctx,

> > > QSVContext *q,

> > >                       pkt->data, pkt->size, pkt->pts, pkt->dts,

> > >                       pkt->pos);

> > > 

> > > -    avctx->field_order  = q->parser->field_order;

> > >      /* TODO: flush delayed frames on reinit */

> > > -    if (q->parser->format       != q->orig_pix_fmt    ||

> > > -        FFALIGN(q->parser->coded_width, 16)  != FFALIGN(avctx-

> > > > coded_width, 16) ||

> > > 

> > > -        FFALIGN(q->parser->coded_height, 16) != FFALIGN(avctx-

> > > > coded_height, 16)) {

> > > 

> > > -        enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,

> > > 

> > 

> > -                                           AV_PIX_F

> > MT_NONE,

> > > 

> > 

> > -                                           AV_PIX_F

> > MT_NONE };

> > > -        enum AVPixelFormat qsv_format;

> > > +

> > > +    // sw_pix_fmt was initialized as NV12, will be updated after

> > > header decoded if not true.

> > > +    if (q->orig_pix_fmt != AV_PIX_FMT_NONE)

> > > +        pix_fmts[1] = q->orig_pix_fmt;

> > 

> > Well, it still seems non self-explanatory to me why we have

> > pix_fmts[1] here.

> 

> I think it has been explained and descripted in my comment. It means

> sw_pix_fmt.

> For more detail, probably you want to check the implementation of

> ff_get_format().

> 

> > > +

> > > +    ret = qsv_decode_header(avctx, q, pkt, pix_fmts, &param);

> > 

> > Parsing can be expensive to perform on each frame. See more below.

> 

> 1. Parsing is just to reading header bits, I don't think it is CPU

> heavy calculation task, compared with slice level decoding. 

> 2. Mostly only key frames have completed header. For non-key frame,

> just return MFX_ERR_MORE_DATA instead of real parsing work. 



That was long when I looked myself in details of DecodeHeader, but I
think that what the following will happen if frame is missing a header
and you feed it into mediasdk: mediasdk will read the whole bitstream
to the end trying to find a header. Once it will fail to find it
mediasdk will return MFX_ERR_MORE_DATA. I think you will be able to see
that if you will print mfxBitstream::DataOffset and
mfxBitstream::DataLength before and after your call for
MFXVideoDECODE_DecodeHeader(). So, mediasdk actually changes DataOffset
either pointing to the beginning of the header or right after the
header (I don't remember exactly, sorry). If header not found  it sets
DataOffset=DataLength and DataLength=0 requesting new bitstream
portion. The reason why you think everything is ok is that you
construct mfxBitstream structure anew each time in qsv_decode_header.
Hence, you just don't notice that mediasdk performs a massive parsing.

I afraid that as is your change will result in massive CPU%
regressions. 

> 

> > > +

> > > +    if (ret >= 0 && (q->orig_pix_fmt !=

> > > ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC) ||

> > > +        avctx->coded_width  != param.mfx.FrameInfo.Width ||

> > > +        avctx->coded_height != param.mfx.FrameInfo.Height)) {

> > >          AVPacket zero_pkt = {0};

> > 

> > You are actually trying to perform the task which is being done by

> > mediasdk.

> > Moreover, you don't detect all the cases when mediasdk decoder will

> > require

> > to be reset. For example, you may have 2 streams concatenated which

> > have

> > perfectly the same resolution, but still decoder reset will be

> > required because

> > of profile/level/gop structure changes and memory reallocation or

> > better to

> > say additional allocation could also be required because mediasdk

> > may

> > require different (bigger or lesser) number of surfaces to properly

> > handle

> > new bitstream. See more below.

> 

> Which case of gop structure changing need to reset? Could you please,

> give an example.

> 

> > > 

> > >          if (q->buffered_count) {

> > > @@ -538,45 +564,24 @@ int ff_qsv_process_data(AVCodecContext

> > 

> > *avctx,

> > > QSVContext *q,

> > >              q->buffered_count--;

> > >              return qsv_decode(avctx, q, frame, got_frame,

> > 

> > &zero_pkt);

> > >          }

> > > -

> > >          q->reinit_flag = 0;

> > > 

> > > -        qsv_format = ff_qsv_map_pixfmt(q->parser->format, &q-

> > > > fourcc);

> > > 

> > > -        if (qsv_format < 0) {

> > > -            av_log(avctx, AV_LOG_ERROR,

> > > -                   "Decoding pixel format '%s' is not

> > 

> > supported\n",

> > > -                   av_get_pix_fmt_name(q->parser->format));

> > > -            ret = AVERROR(ENOSYS);

> > > -            goto reinit_fail;

> > > -        }

> > > +        q->orig_pix_fmt = avctx->pix_fmt = pix_fmts[1] =

> > > ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC);

> > > 

> > > -        q->orig_pix_fmt     = q->parser->format;

> > > -        avctx->pix_fmt      = pix_fmts[1] = qsv_format;

> > > -        avctx->width        = q->parser->width;

> > > -        avctx->height       = q->parser->height;

> > > -        avctx->coded_width  = FFALIGN(q->parser->coded_width,

> > 

> > 16);

> > > -        avctx->coded_height = FFALIGN(q->parser->coded_height,

> > 

> > 16);

> > > -        avctx->level        = q->avctx_internal->level;

> > > -        avctx->profile      = q->avctx_internal->profile;

> > > +        avctx->coded_width  = param.mfx.FrameInfo.Width;

> > > +        avctx->coded_height = param.mfx.FrameInfo.Height;

> > > 

> > > -        ret = ff_get_format(avctx, pix_fmts);

> > > +        ret = qsv_decode_preinit(avctx, q, pix_fmts, &param);

> > >          if (ret < 0)

> > >              goto reinit_fail;

> > > +        q->initialized = 0;

> > > +    }

> > > 

> > > -        avctx->pix_fmt = ret;

> > > -

> > > -        desc = av_pix_fmt_desc_get(avctx->pix_fmt);

> > > -        if (!desc)

> > > -            goto reinit_fail;

> > > -

> > > -         if (desc->comp[0].depth > 8) {

> > > -            avctx->coded_width

> > 

> > =  FFALIGN(q->parser->coded_width,

> > > 32);

> > > -            avctx->coded_height =

> > 

> > FFALIGN(q->parser->coded_height,

> > > 32);

> > > -        }

> > > -

> > > -        ret = qsv_decode_init(avctx, q);

> > > +    if (!q->initialized) {

> > > +        ret = qsv_decode_init(avctx, q, &param);

> > >          if (ret < 0)

> > >              goto reinit_fail;

> > > +        q->initialized = 1;

> > >      }

> > > 

> > >      return qsv_decode(avctx, q, frame, got_frame, pkt);

> > 

> > So, from my perspective you should not attempt to call decode

> > header on

> > each incoming data portion and try to interpret results. Instead

> > you should

> > respect what mediasdk DecodeFrameAsync is returning. Basically it

> > will

> > return MFX_ERR_INCOMPATIBLE_VIDEO_PARAM is decoder reset is

> > required.

> > This gives you 2 situations when you should call

> > DecodeHeader:

> > 1. You either call it if decoder is not initalized at all 2. Or you

> > call it upon

> > receiving MFX_ERR_INCOMPATIBLE_VIDEO_PARAM

> > 

> > Once you received MFX_ERR_INCOMPATIBLE_VIDEO_PARAM step to

> > perform are:

> > 1. Flush cached frames from medisdk decoder passing nullptr

> > bitstream to

> > DecodeFrameAsync 2. Run decode header and get updated parameters 3.

> > Check (via QueryIOSurf) and potentially reallocate surfaces to

> > satisfy new

> > bitstream requirement 4. Reset decoder with new parameters

> 

> 1. This patch is just to replace original parser to MSDK parser. I

> don't like to mix other thing here (thus probably introduces new

> bugs, but full round testing has been done for this patch).

>   If it is really necessary, should be done by an additional patch. 

> 2. Unfortunately, if we don't reset and set correct

> resolution/pix_fmt, FFmpeg-qsv already failed in the progress of MSDK

> initialization (e,g: pass a NV12 format but actually it is P010. see

> : https://github.com/FFmpeg/FFmpeg/blob/master/libavcodec/qsv.c#L424

> )

>   There is no opportunity to get the status of DecodeFrameAsync at

> these case. I haven't compared MSDK sample decoder with current

> FFmpeg (Will take a look later). But IIRC, MSDK sample decoder isn't

> doing a better job than ffmpeg-qsv for reinit case.

>   (one example, it is hang when run sample_decode h264 -i /fate-

> suite/h264/reinit-large_420_8-to-small_420_8.h264 -r , and this clip

> is changing from a larger resolution to a smaller one. I wonder to

> know what will happen if change resolution from smaller to a larger,

> eg, 720x480 to 1920x1080).

> 3. IMHO, reinit work is better to do before decoding, instead of

> decoding it and recover it after it is failed.


Per my opinion you don't actually miss the resolution change following
the algorithm which I described. Secondly - you don't actually recover
from the decoding failure since there was not decoding failure
actually. Mediasdk parses bitstream and if it detects that decoding can
not be completed without user intersection it signals this to you via
warning or error. This as not a fatal error that's just a signal: "I
can't continue as is - handle this". So, mediasdk parser stops on the
point of stream change. Mediasdk internal state is correct one, it is
not corrupted and it corresponds to the bitstream which was handled
before incompatible bitstream part was detected. You need to call
DecodeHeader with the same mfxBitstream you just used to pass to
DecodeFrameAsync. Mind that you may find mfxBitstream::DataOffset
changed after DecodeFrameAsync pointing to the beginning of the new
header.

> 4. This a common function for all decoders, I would like to see it is

> under control of ffmpeg. MSDK may have no unified implementation of

> all codecs ( e.g: https://github.com/Intel-Media-

> SDK/MediaSDK/issues/333 ).

>   Your suggestion may be addressed only when I see any new issues or

> bugs. But currently I can't see the necessity. 

> 

> > Application may attempt to optimize procedure on step 3 above and

> > avoid

> > surfaces reallocation if possible. For example, if you have

> > allocated

> > 1920x1080 surfaces and resolution changed to 720x480, you may

> > actually

> > stay on 1920x1080 if you are ok to use more memory hoping to

> > receive

> > 1920x1080 stream in the future again. Eventually this would work if

> > you have

> > enough number of 1920x1080 surfaces.

> 

> Hm, this is a simpler topic, https://patchwork.ffmpeg.org/patch/12257

> / is to prepare this though decoder should be updated too. But as

> previous discussion, thus should be a separated patch and need pre-

> testing.
Rogozhkin, Dmitry V March 11, 2019, 11:49 p.m. UTC | #4
On Mon, 2019-03-11 at 17:23 +0800, Li, Zhong wrote:
> > -----Original Message-----

> > From: Rogozhkin, Dmitry V

> > Sent: Saturday, March 9, 2019 8:48 AM

> > To: ffmpeg-devel@ffmpeg.org

> > Cc: Li, Zhong <zhong.li@intel.com>

> > Subject: Re: [FFmpeg-devel] [PATCH v3 3/6] lavc/qsvdec: Replace

> > current

> > parser with MFXVideoDECODE_DecodeHeader()

> > 

> > On Fri, 2019-03-08 at 15:40 +0800, Zhong Li wrote:

> > > Using MSDK parser can improve qsv decoder pass rate in some cases

> > > (E.g:

> > > sps declares a wrong level_idc, smaller than it should be).

> > > And it is necessary for adding new qsv decoders such as MJPEG and

> > > VP9

> > > since current parser can't provide enough information.

> > > Actually using MFXVideoDECODE_DecodeHeader() was disscussed at

> > > https://ffmpeg.org/pipermail/ffmpeg-devel/2015-July/175734.html

> > > and

> > > merged as commit 1acb19d, but was overwritten when merged libav

> > > patches (commit: 1f26a23) without any explain.

> > > 

> > > v2: split decode header from decode_init, and call it for

> > > everyframe

> > > to detect format/resoultion change. It can fix some regression

> > > issues

> > > such as hevc 10bits decoding.

> > > 

> > > Signed-off-by: Zhong Li <zhong.li@intel.com>

> > > ---

> > >  libavcodec/qsvdec.c       | 172 ++++++++++++++++++++----------

> > > ----

> > > ----

> > >  libavcodec/qsvdec.h       |   2 +

> > >  libavcodec/qsvdec_h2645.c |   1 +

> > >  libavcodec/qsvdec_other.c |   1 +

> > >  4 files changed, 93 insertions(+), 83 deletions(-)

> > > 

> > > diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c index

> > > 4a0be811fb..b78026e14d 100644

> > > --- a/libavcodec/qsvdec.c

> > > +++ b/libavcodec/qsvdec.c

> > > @@ -120,19 +120,17 @@ static inline unsigned int

> > > qsv_fifo_size(const

> > > AVFifoBuffer* fifo)

> > >      return av_fifo_size(fifo) / qsv_fifo_item_size();

> > >  }

> > > 

> > > -static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q)

> > > +static int qsv_decode_preinit(AVCodecContext *avctx, QSVContext

> > > *q,

> > > enum AVPixelFormat *pix_fmts, mfxVideoParam *param)

> > >  {

> > > -    const AVPixFmtDescriptor *desc;

> > >      mfxSession session = NULL;

> > >      int iopattern = 0;

> > > -    mfxVideoParam param = { 0 };

> > > -    int frame_width  = avctx->coded_width;

> > > -    int frame_height = avctx->coded_height;

> > >      int ret;

> > > 

> > > -    desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);

> > > -    if (!desc)

> > > -        return AVERROR_BUG;

> > > +    ret = ff_get_format(avctx, pix_fmts);

> > > +    if (ret < 0) {

> > > +        q->orig_pix_fmt = avctx->pix_fmt = AV_PIX_FMT_NONE;

> > > +        return ret;

> > > +    }

> > > 

> > >      if (!q->async_fifo) {

> > >          q->async_fifo = av_fifo_alloc(q->async_depth *

> > > qsv_fifo_item_size()); @@ -170,48 +168,72 @@ static int

> > > qsv_decode_init(AVCodecContext *avctx, QSVContext *q)

> > >          return ret;

> > >      }

> > > 

> > > -    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);

> > > -    if (ret < 0)

> > > -        return ret;

> > > +    param->IOPattern   = q->iopattern;

> > > +    param->AsyncDepth  = q->async_depth;

> > > +    param->ExtParam    = q->ext_buffers;

> > > +    param->NumExtParam = q->nb_ext_buffers;

> > > 

> > > -    param.mfx.CodecId      = ret;

> > > -    param.mfx.CodecProfile = ff_qsv_profile_to_mfx(avctx-

> > > >codec_id,

> > > avctx->profile);

> > > -    param.mfx.CodecLevel   = avctx->level ==

> > 

> > FF_LEVEL_UNKNOWN ?

> > > MFX_LEVEL_UNKNOWN : avctx->level;

> > > -

> > > -    param.mfx.FrameInfo.BitDepthLuma   = desc->comp[0].depth;

> > > -    param.mfx.FrameInfo.BitDepthChroma = desc->comp[0].depth;

> > > -    param.mfx.FrameInfo.Shift          = desc->comp[0].depth >

> > 

> > 8;

> > > -    param.mfx.FrameInfo.FourCC         = q->fourcc;

> > > -    param.mfx.FrameInfo.Width          = frame_width;

> > > -    param.mfx.FrameInfo.Height         = frame_height;

> > > -    param.mfx.FrameInfo.ChromaFormat   =

> > 

> > MFX_CHROMAFORMAT_YUV420;

> > > -

> > > -    switch (avctx->field_order) {

> > > -    case AV_FIELD_PROGRESSIVE:

> > > -        param.mfx.FrameInfo.PicStruct =

> > 

> > MFX_PICSTRUCT_PROGRESSIVE;

> > > -        break;

> > > -    case AV_FIELD_TT:

> > > -        param.mfx.FrameInfo.PicStruct =

> > 

> > MFX_PICSTRUCT_FIELD_TFF;

> > > -        break;

> > > -    case AV_FIELD_BB:

> > > -        param.mfx.FrameInfo.PicStruct =

> > 

> > MFX_PICSTRUCT_FIELD_BFF;

> > > -        break;

> > > -    default:

> > > -        param.mfx.FrameInfo.PicStruct =

> > 

> > MFX_PICSTRUCT_UNKNOWN;

> > > -        break;

> > > -    }

> > > +    return 0;

> > > + }

> > > 

> > > -    param.IOPattern   = q->iopattern;

> > > -    param.AsyncDepth  = q->async_depth;

> > > -    param.ExtParam    = q->ext_buffers;

> > > -    param.NumExtParam = q->nb_ext_buffers;

> > > +static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q,

> > > mfxVideoParam *param)

> > > +{

> > > +    int ret;

> > > 

> > > -    ret = MFXVideoDECODE_Init(q->session, &param);

> > > +    avctx->width        = param->mfx.FrameInfo.CropW;

> > > +    avctx->height       = param->mfx.FrameInfo.CropH;

> > > +    avctx->coded_width  = param->mfx.FrameInfo.Width;

> > > +    avctx->coded_height = param->mfx.FrameInfo.Height;

> > > +    avctx->level        = param->mfx.CodecLevel;

> > > +    avctx->profile      = param->mfx.CodecProfile;

> > > +    avctx->field_order  = ff_qsv_map_picstruct(param-

> > > > mfx.FrameInfo.PicStruct);

> > > 

> > > +    avctx->pix_fmt      = ff_qsv_map_fourcc(param-

> > > > mfx.FrameInfo.FourCC);

> > > 

> > > +

> > > +    ret = MFXVideoDECODE_Init(q->session, param);

> > >      if (ret < 0)

> > >          return ff_qsv_print_error(avctx, ret,

> > >                                    "Error initializing the

> > 

> > MFX video

> > > decoder");

> > > 

> > > -    q->frame_info = param.mfx.FrameInfo;

> > > +    q->frame_info = param->mfx.FrameInfo;

> > > +

> > > +    return 0;

> > > +}

> > > +

> > > +static int qsv_decode_header(AVCodecContext *avctx, QSVContext

> > > *q,

> > > AVPacket *avpkt, enum AVPixelFormat *pix_fmts, mfxVideoParam

> > > *param)

> > > +{

> > > +    int ret;

> > > +

> > > +    mfxBitstream bs = { { { 0 } } };

> > > +

> > > +    if (avpkt->size) {

> > > +        bs.Data       = avpkt->data;

> > > +        bs.DataLength = avpkt->size;

> > > +        bs.MaxLength  = bs.DataLength;

> > > +        bs.TimeStamp  = avpkt->pts;

> > > +        if (avctx->field_order == AV_FIELD_PROGRESSIVE)

> > > +            bs.DataFlag   |=

> > 

> > MFX_BITSTREAM_COMPLETE_FRAME;

> > > +    } else

> > > +        return AVERROR_INVALIDDATA;

> > > +

> > > +

> > > +    if(!q->session) {

> > > +        ret = qsv_decode_preinit(avctx, q, pix_fmts, param);

> > > +        if (ret < 0)

> > > +            return ret;

> > > +    }

> > > +

> > > +    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);

> > > +    if (ret < 0)

> > > +        return ret;

> > > +

> > > +    param->mfx.CodecId = ret;

> > > +    ret = MFXVideoDECODE_DecodeHeader(q->session, &bs, param);

> > > +    if (MFX_ERR_MORE_DATA == ret) {

> > > +       return AVERROR(EAGAIN);

> > > +    }

> > > +    if (ret < 0)

> > > +        return ff_qsv_print_error(avctx, ret,

> > > +                "Error decoding stream header");

> > > 

> > >      return 0;

> > >  }

> > > @@ -494,7 +516,10 @@ int ff_qsv_process_data(AVCodecContext

> > > *avctx,

> > > QSVContext *q,

> > >      uint8_t *dummy_data;

> > >      int dummy_size;

> > >      int ret;

> > > -    const AVPixFmtDescriptor *desc;

> > > +    mfxVideoParam param = { 0 };

> > > +    enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,

> > > 

> > 

> > +                                       AV_PIX_FMT_NV

> > 12,

> > > 

> > 

> > +                                       AV_PIX_FMT_N

> > ONE };

> > > 

> > >      if (!q->avctx_internal) {

> > >          q->avctx_internal = avcodec_alloc_context3(NULL); @@

> > 

> > -508,7

> > > +533,6 @@ int ff_qsv_process_data(AVCodecContext *avctx,

> > > QSVContext

> > > *q,

> > >              return AVERROR(ENOMEM);

> > > 

> > >          q->parser->flags |= PARSER_FLAG_COMPLETE_FRAMES;

> > > -        q->orig_pix_fmt   = AV_PIX_FMT_NONE;

> > >      }

> > > 

> > >      if (!pkt->size)

> > > @@ -521,15 +545,17 @@ int ff_qsv_process_data(AVCodecContext

> > 

> > *avctx,

> > > QSVContext *q,

> > >                       pkt->data, pkt->size, pkt->pts, pkt->dts,

> > >                       pkt->pos);

> > > 

> > > -    avctx->field_order  = q->parser->field_order;

> > >      /* TODO: flush delayed frames on reinit */

> > > -    if (q->parser->format       != q->orig_pix_fmt    ||

> > > -        FFALIGN(q->parser->coded_width, 16)  != FFALIGN(avctx-

> > > > coded_width, 16) ||

> > > 

> > > -        FFALIGN(q->parser->coded_height, 16) != FFALIGN(avctx-

> > > > coded_height, 16)) {

> > > 

> > > -        enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,

> > > 

> > 

> > -                                           AV_PIX_F

> > MT_NONE,

> > > 

> > 

> > -                                           AV_PIX_F

> > MT_NONE };

> > > -        enum AVPixelFormat qsv_format;

> > > +

> > > +    // sw_pix_fmt was initialized as NV12, will be updated after

> > > header decoded if not true.

> > > +    if (q->orig_pix_fmt != AV_PIX_FMT_NONE)

> > > +        pix_fmts[1] = q->orig_pix_fmt;

> > 

> > Well, it still seems non self-explanatory to me why we have

> > pix_fmts[1] here.

> 

> I think it has been explained and descripted in my comment. It means

> sw_pix_fmt.

> For more detail, probably you want to check the implementation of

> ff_get_format().

> 

> > > +

> > > +    ret = qsv_decode_header(avctx, q, pkt, pix_fmts, &param);

> > 

> > Parsing can be expensive to perform on each frame. See more below.

> 

> 1. Parsing is just to reading header bits, I don't think it is CPU

> heavy calculation task, compared with slice level decoding. 

> 2. Mostly only key frames have completed header. For non-key frame,

> just return MFX_ERR_MORE_DATA instead of real parsing work. 

> 

> > > +

> > > +    if (ret >= 0 && (q->orig_pix_fmt !=

> > > ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC) ||

> > > +        avctx->coded_width  != param.mfx.FrameInfo.Width ||

> > > +        avctx->coded_height != param.mfx.FrameInfo.Height)) {

> > >          AVPacket zero_pkt = {0};

> > 

> > You are actually trying to perform the task which is being done by

> > mediasdk.

> > Moreover, you don't detect all the cases when mediasdk decoder will

> > require

> > to be reset. For example, you may have 2 streams concatenated which

> > have

> > perfectly the same resolution, but still decoder reset will be

> > required because

> > of profile/level/gop structure changes and memory reallocation or

> > better to

> > say additional allocation could also be required because mediasdk

> > may

> > require different (bigger or lesser) number of surfaces to properly

> > handle

> > new bitstream. See more below.

> 

> Which case of gop structure changing need to reset? Could you please,

> give an example.

> 

> > > 

> > >          if (q->buffered_count) {

> > > @@ -538,45 +564,24 @@ int ff_qsv_process_data(AVCodecContext

> > 

> > *avctx,

> > > QSVContext *q,

> > >              q->buffered_count--;

> > >              return qsv_decode(avctx, q, frame, got_frame,

> > 

> > &zero_pkt);

> > >          }

> > > -

> > >          q->reinit_flag = 0;

> > > 

> > > -        qsv_format = ff_qsv_map_pixfmt(q->parser->format, &q-

> > > > fourcc);

> > > 

> > > -        if (qsv_format < 0) {

> > > -            av_log(avctx, AV_LOG_ERROR,

> > > -                   "Decoding pixel format '%s' is not

> > 

> > supported\n",

> > > -                   av_get_pix_fmt_name(q->parser->format));

> > > -            ret = AVERROR(ENOSYS);

> > > -            goto reinit_fail;

> > > -        }

> > > +        q->orig_pix_fmt = avctx->pix_fmt = pix_fmts[1] =

> > > ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC);

> > > 

> > > -        q->orig_pix_fmt     = q->parser->format;

> > > -        avctx->pix_fmt      = pix_fmts[1] = qsv_format;

> > > -        avctx->width        = q->parser->width;

> > > -        avctx->height       = q->parser->height;

> > > -        avctx->coded_width  = FFALIGN(q->parser->coded_width,

> > 

> > 16);

> > > -        avctx->coded_height = FFALIGN(q->parser->coded_height,

> > 

> > 16);

> > > -        avctx->level        = q->avctx_internal->level;

> > > -        avctx->profile      = q->avctx_internal->profile;

> > > +        avctx->coded_width  = param.mfx.FrameInfo.Width;

> > > +        avctx->coded_height = param.mfx.FrameInfo.Height;

> > > 

> > > -        ret = ff_get_format(avctx, pix_fmts);

> > > +        ret = qsv_decode_preinit(avctx, q, pix_fmts, &param);

> > >          if (ret < 0)

> > >              goto reinit_fail;

> > > +        q->initialized = 0;

> > > +    }

> > > 

> > > -        avctx->pix_fmt = ret;

> > > -

> > > -        desc = av_pix_fmt_desc_get(avctx->pix_fmt);

> > > -        if (!desc)

> > > -            goto reinit_fail;

> > > -

> > > -         if (desc->comp[0].depth > 8) {

> > > -            avctx->coded_width

> > 

> > =  FFALIGN(q->parser->coded_width,

> > > 32);

> > > -            avctx->coded_height =

> > 

> > FFALIGN(q->parser->coded_height,

> > > 32);

> > > -        }

> > > -

> > > -        ret = qsv_decode_init(avctx, q);

> > > +    if (!q->initialized) {

> > > +        ret = qsv_decode_init(avctx, q, &param);

> > >          if (ret < 0)

> > >              goto reinit_fail;

> > > +        q->initialized = 1;

> > >      }

> > > 

> > >      return qsv_decode(avctx, q, frame, got_frame, pkt);

> > 

> > So, from my perspective you should not attempt to call decode

> > header on

> > each incoming data portion and try to interpret results. Instead

> > you should

> > respect what mediasdk DecodeFrameAsync is returning. Basically it

> > will

> > return MFX_ERR_INCOMPATIBLE_VIDEO_PARAM is decoder reset is

> > required.

> > This gives you 2 situations when you should call

> > DecodeHeader:

> > 1. You either call it if decoder is not initalized at all 2. Or you

> > call it upon

> > receiving MFX_ERR_INCOMPATIBLE_VIDEO_PARAM

> > 

> > Once you received MFX_ERR_INCOMPATIBLE_VIDEO_PARAM step to

> > perform are:

> > 1. Flush cached frames from medisdk decoder passing nullptr

> > bitstream to

> > DecodeFrameAsync 2. Run decode header and get updated parameters 3.

> > Check (via QueryIOSurf) and potentially reallocate surfaces to

> > satisfy new

> > bitstream requirement 4. Reset decoder with new parameters

> 

> 1. This patch is just to replace original parser to MSDK parser. I

> don't like to mix other thing here (thus probably introduces new

> bugs, but full round testing has been done for this patch).

>   If it is really necessary, should be done by an additional patch. 

> 2. Unfortunately, if we don't reset and set correct

> resolution/pix_fmt, FFmpeg-qsv already failed in the progress of MSDK

> initialization (e,g: pass a NV12 format but actually it is P010. see

> : https://github.com/FFmpeg/FFmpeg/blob/master/libavcodec/qsv.c#L424

> )

>   There is no opportunity to get the status of DecodeFrameAsync at

> these case. I haven't compared MSDK sample decoder with current

> FFmpeg (Will take a look later). But IIRC, MSDK sample decoder isn't

> doing a better job than ffmpeg-qsv for reinit case.

>   (one example, it is hang when run sample_decode h264 -i /fate-

> suite/h264/reinit-large_420_8-to-small_420_8.h264 -r , and this clip

> is changing from a larger resolution to a smaller one.


If you see the bug - file the bug. As far as I know sample_decode
supports resolution change.

> I wonder to know what will happen if change resolution from smaller

> to a larger, eg, 720x480 to 1920x1080).

> 3. IMHO, reinit work is better to do before decoding, instead of

> decoding it and recover it after it is failed. 

> 4. This a common function for all decoders, I would like to see it is

> under control of ffmpeg. MSDK may have no unified implementation of

> all codecs ( e.g: https://github.com/Intel-Media-SDK/MediaSDK/issues/

> 333 ).

>   Your suggestion may be addressed only when I see any new issues or

> bugs. But currently I can't see the necessity. 

> 

> > Application may attempt to optimize procedure on step 3 above and

> > avoid

> > surfaces reallocation if possible. For example, if you have

> > allocated

> > 1920x1080 surfaces and resolution changed to 720x480, you may

> > actually

> > stay on 1920x1080 if you are ok to use more memory hoping to

> > receive

> > 1920x1080 stream in the future again. Eventually this would work if

> > you have

> > enough number of 1920x1080 surfaces.

> 

> Hm, this is a simpler topic, https://patchwork.ffmpeg.org/patch/12257

> / is to prepare this though decoder should be updated too. But as

> previous discussion, thus should be a separated patch and need pre-

> testing.
Rogozhkin, Dmitry V March 12, 2019, 12:04 a.m. UTC | #5
pix_fmts[1] is misleading. And I think it can be quite easily
avoided.

If I understand the code correctly, qsv_decode_preinit is the place
(and the only place) where you need an array of pix_fmts. You actually
request ffmpeg to select one of the formats and program it into avctx.
Is that right?

If so, could we try to define the array exactly in this function and
avoid passing the whole array around? See comments below for
suggestions...


On Mon, 2019-03-11 at 17:23 +0800, Li, Zhong wrote:
> > -----Original Message-----

> > From: Rogozhkin, Dmitry V

> > Sent: Saturday, March 9, 2019 8:48 AM

> > To: ffmpeg-devel@ffmpeg.org

> > Cc: Li, Zhong <zhong.li@intel.com>

> > Subject: Re: [FFmpeg-devel] [PATCH v3 3/6] lavc/qsvdec: Replace

> > current

> > parser with MFXVideoDECODE_DecodeHeader()

> > 

> > On Fri, 2019-03-08 at 15:40 +0800, Zhong Li wrote:

> > > Using MSDK parser can improve qsv decoder pass rate in some cases

> > > (E.g:

> > > sps declares a wrong level_idc, smaller than it should be).

> > > And it is necessary for adding new qsv decoders such as MJPEG and

> > > VP9

> > > since current parser can't provide enough information.

> > > Actually using MFXVideoDECODE_DecodeHeader() was disscussed at

> > > https://ffmpeg.org/pipermail/ffmpeg-devel/2015-July/175734.html

> > > and

> > > merged as commit 1acb19d, but was overwritten when merged libav

> > > patches (commit: 1f26a23) without any explain.

> > > 

> > > v2: split decode header from decode_init, and call it for

> > > everyframe

> > > to detect format/resoultion change. It can fix some regression

> > > issues

> > > such as hevc 10bits decoding.

> > > 

> > > Signed-off-by: Zhong Li <zhong.li@intel.com>

> > > ---

> > >  libavcodec/qsvdec.c       | 172 ++++++++++++++++++++----------

> > > ----

> > > ----

> > >  libavcodec/qsvdec.h       |   2 +

> > >  libavcodec/qsvdec_h2645.c |   1 +

> > >  libavcodec/qsvdec_other.c |   1 +

> > >  4 files changed, 93 insertions(+), 83 deletions(-)

> > > 

> > > diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c index

> > > 4a0be811fb..b78026e14d 100644

> > > --- a/libavcodec/qsvdec.c

> > > +++ b/libavcodec/qsvdec.c

> > > @@ -120,19 +120,17 @@ static inline unsigned int

> > > qsv_fifo_size(const

> > > AVFifoBuffer* fifo)

> > >      return av_fifo_size(fifo) / qsv_fifo_item_size();

> > >  }

> > > 

> > > -static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q)

> > > +static int qsv_decode_preinit(AVCodecContext *avctx, QSVContext

> > > *q,

> > > enum AVPixelFormat *pix_fmts, mfxVideoParam *param)


Just: enum AVPixelFormat pix_fmt. Single one we need.

> > >  {

> > > -    const AVPixFmtDescriptor *desc;

> > >      mfxSession session = NULL;

> > >      int iopattern = 0;

> > > -    mfxVideoParam param = { 0 };

> > > -    int frame_width  = avctx->coded_width;

> > > -    int frame_height = avctx->coded_height;

> > >      int ret;

> > > 

> > > -    desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);

> > > -    if (!desc)

> > > -        return AVERROR_BUG;


enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,
                                   pix_fmt,  // <<< use the one we got
in function call
                                   AV_PIX_FMT_NONE };

> > > +    ret = ff_get_format(avctx, pix_fmts);

> > > +    if (ret < 0) {

> > > +        q->orig_pix_fmt = avctx->pix_fmt = AV_PIX_FMT_NONE;

> > > +        return ret;

> > > +    }

> > > 

> > >      if (!q->async_fifo) {

> > >          q->async_fifo = av_fifo_alloc(q->async_depth *

> > > qsv_fifo_item_size()); @@ -170,48 +168,72 @@ static int

> > > qsv_decode_init(AVCodecContext *avctx, QSVContext *q)

> > >          return ret;

> > >      }

> > > 

> > > -    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);

> > > -    if (ret < 0)

> > > -        return ret;

> > > +    param->IOPattern   = q->iopattern;

> > > +    param->AsyncDepth  = q->async_depth;

> > > +    param->ExtParam    = q->ext_buffers;

> > > +    param->NumExtParam = q->nb_ext_buffers;

> > > 

> > > -    param.mfx.CodecId      = ret;

> > > -    param.mfx.CodecProfile = ff_qsv_profile_to_mfx(avctx-

> > > >codec_id,

> > > avctx->profile);

> > > -    param.mfx.CodecLevel   = avctx->level ==

> > 

> > FF_LEVEL_UNKNOWN ?

> > > MFX_LEVEL_UNKNOWN : avctx->level;

> > > -

> > > -    param.mfx.FrameInfo.BitDepthLuma   = desc->comp[0].depth;

> > > -    param.mfx.FrameInfo.BitDepthChroma = desc->comp[0].depth;

> > > -    param.mfx.FrameInfo.Shift          = desc->comp[0].depth >

> > 

> > 8;

> > > -    param.mfx.FrameInfo.FourCC         = q->fourcc;

> > > -    param.mfx.FrameInfo.Width          = frame_width;

> > > -    param.mfx.FrameInfo.Height         = frame_height;

> > > -    param.mfx.FrameInfo.ChromaFormat   =

> > 

> > MFX_CHROMAFORMAT_YUV420;

> > > -

> > > -    switch (avctx->field_order) {

> > > -    case AV_FIELD_PROGRESSIVE:

> > > -        param.mfx.FrameInfo.PicStruct =

> > 

> > MFX_PICSTRUCT_PROGRESSIVE;

> > > -        break;

> > > -    case AV_FIELD_TT:

> > > -        param.mfx.FrameInfo.PicStruct =

> > 

> > MFX_PICSTRUCT_FIELD_TFF;

> > > -        break;

> > > -    case AV_FIELD_BB:

> > > -        param.mfx.FrameInfo.PicStruct =

> > 

> > MFX_PICSTRUCT_FIELD_BFF;

> > > -        break;

> > > -    default:

> > > -        param.mfx.FrameInfo.PicStruct =

> > 

> > MFX_PICSTRUCT_UNKNOWN;

> > > -        break;

> > > -    }

> > > +    return 0;

> > > + }

> > > 

> > > -    param.IOPattern   = q->iopattern;

> > > -    param.AsyncDepth  = q->async_depth;

> > > -    param.ExtParam    = q->ext_buffers;

> > > -    param.NumExtParam = q->nb_ext_buffers;

> > > +static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q,

> > > mfxVideoParam *param)

> > > +{

> > > +    int ret;

> > > 

> > > -    ret = MFXVideoDECODE_Init(q->session, &param);

> > > +    avctx->width        = param->mfx.FrameInfo.CropW;

> > > +    avctx->height       = param->mfx.FrameInfo.CropH;

> > > +    avctx->coded_width  = param->mfx.FrameInfo.Width;

> > > +    avctx->coded_height = param->mfx.FrameInfo.Height;

> > > +    avctx->level        = param->mfx.CodecLevel;

> > > +    avctx->profile      = param->mfx.CodecProfile;

> > > +    avctx->field_order  = ff_qsv_map_picstruct(param-

> > > > mfx.FrameInfo.PicStruct);

> > > 

> > > +    avctx->pix_fmt      = ff_qsv_map_fourcc(param-

> > > > mfx.FrameInfo.FourCC);

> > > 

> > > +

> > > +    ret = MFXVideoDECODE_Init(q->session, param);

> > >      if (ret < 0)

> > >          return ff_qsv_print_error(avctx, ret,

> > >                                    "Error initializing the

> > 

> > MFX video

> > > decoder");

> > > 

> > > -    q->frame_info = param.mfx.FrameInfo;

> > > +    q->frame_info = param->mfx.FrameInfo;

> > > +

> > > +    return 0;

> > > +}

> > > +

> > > +static int qsv_decode_header(AVCodecContext *avctx, QSVContext

> > > *q,

> > > AVPacket *avpkt, enum AVPixelFormat *pix_fmts, mfxVideoParam

> > > *param)


Just: enum AVPixelFormat pix_fmt. Single one we need.

> > > +{

> > > +    int ret;

> > > +

> > > +    mfxBitstream bs = { { { 0 } } };

> > > +

> > > +    if (avpkt->size) {

> > > +        bs.Data       = avpkt->data;

> > > +        bs.DataLength = avpkt->size;

> > > +        bs.MaxLength  = bs.DataLength;

> > > +        bs.TimeStamp  = avpkt->pts;

> > > +        if (avctx->field_order == AV_FIELD_PROGRESSIVE)

> > > +            bs.DataFlag   |=

> > 

> > MFX_BITSTREAM_COMPLETE_FRAME;

> > > +    } else

> > > +        return AVERROR_INVALIDDATA;

> > > +

> > > +

> > > +    if(!q->session) {

> > > +        ret = qsv_decode_preinit(avctx, q, pix_fmts, param);

> > > +        if (ret < 0)

> > > +            return ret;

> > > +    }

> > > +

> > > +    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);

> > > +    if (ret < 0)

> > > +        return ret;

> > > +

> > > +    param->mfx.CodecId = ret;

> > > +    ret = MFXVideoDECODE_DecodeHeader(q->session, &bs, param);

> > > +    if (MFX_ERR_MORE_DATA == ret) {

> > > +       return AVERROR(EAGAIN);

> > > +    }

> > > +    if (ret < 0)

> > > +        return ff_qsv_print_error(avctx, ret,

> > > +                "Error decoding stream header");

> > > 

> > >      return 0;

> > >  }

> > > @@ -494,7 +516,10 @@ int ff_qsv_process_data(AVCodecContext

> > > *avctx,

> > > QSVContext *q,

> > >      uint8_t *dummy_data;

> > >      int dummy_size;

> > >      int ret;

> > > -    const AVPixFmtDescriptor *desc;

> > > +    mfxVideoParam param = { 0 };

> > > +    enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,

> > > 

> > 

> > +                                       AV_PIX_FMT_NV

> > 12,

> > > 

> > 

> > +                                       AV_PIX_FMT_N

> > ONE };


Just enum AVPixelFormat pix_fmt = AV_PIX_FMT_NV12.

> > > 

> > >      if (!q->avctx_internal) {

> > >          q->avctx_internal = avcodec_alloc_context3(NULL); @@

> > 

> > -508,7

> > > +533,6 @@ int ff_qsv_process_data(AVCodecContext *avctx,

> > > QSVContext

> > > *q,

> > >              return AVERROR(ENOMEM);

> > > 

> > >          q->parser->flags |= PARSER_FLAG_COMPLETE_FRAMES;

> > > -        q->orig_pix_fmt   = AV_PIX_FMT_NONE;

> > >      }

> > > 

> > >      if (!pkt->size)

> > > @@ -521,15 +545,17 @@ int ff_qsv_process_data(AVCodecContext

> > 

> > *avctx,

> > > QSVContext *q,

> > >                       pkt->data, pkt->size, pkt->pts, pkt->dts,

> > >                       pkt->pos);

> > > 

> > > -    avctx->field_order  = q->parser->field_order;

> > >      /* TODO: flush delayed frames on reinit */

> > > -    if (q->parser->format       != q->orig_pix_fmt    ||

> > > -        FFALIGN(q->parser->coded_width, 16)  != FFALIGN(avctx-

> > > > coded_width, 16) ||

> > > 

> > > -        FFALIGN(q->parser->coded_height, 16) != FFALIGN(avctx-

> > > > coded_height, 16)) {

> > > 

> > > -        enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,

> > > 

> > 

> > -                                           AV_PIX_F

> > MT_NONE,

> > > 

> > 

> > -                                           AV_PIX_F

> > MT_NONE };

> > > -        enum AVPixelFormat qsv_format;

> > > +

> > > +    // sw_pix_fmt was initialized as NV12, will be updated after

> > > header decoded if not true.

> > > +    if (q->orig_pix_fmt != AV_PIX_FMT_NONE)

> > > +        pix_fmts[1] = q->orig_pix_fmt;

> > 

> > Well, it still seems non self-explanatory to me why we have

> > pix_fmts[1] here.

> 

> I think it has been explained and descripted in my comment. It means

> sw_pix_fmt.

> For more detail, probably you want to check the implementation of

> ff_get_format().

> 

> > > +

> > > +    ret = qsv_decode_header(avctx, q, pkt, pix_fmts, &param);

> > 

> > Parsing can be expensive to perform on each frame. See more below.

> 

> 1. Parsing is just to reading header bits, I don't think it is CPU

> heavy calculation task, compared with slice level decoding. 

> 2. Mostly only key frames have completed header. For non-key frame,

> just return MFX_ERR_MORE_DATA instead of real parsing work. 

> 

> > > +

> > > +    if (ret >= 0 && (q->orig_pix_fmt !=

> > > ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC) ||

> > > +        avctx->coded_width  != param.mfx.FrameInfo.Width ||

> > > +        avctx->coded_height != param.mfx.FrameInfo.Height)) {

> > >          AVPacket zero_pkt = {0};

> > 

> > You are actually trying to perform the task which is being done by

> > mediasdk.

> > Moreover, you don't detect all the cases when mediasdk decoder will

> > require

> > to be reset. For example, you may have 2 streams concatenated which

> > have

> > perfectly the same resolution, but still decoder reset will be

> > required because

> > of profile/level/gop structure changes and memory reallocation or

> > better to

> > say additional allocation could also be required because mediasdk

> > may

> > require different (bigger or lesser) number of surfaces to properly

> > handle

> > new bitstream. See more below.

> 

> Which case of gop structure changing need to reset? Could you please,

> give an example.

> 

> > > 

> > >          if (q->buffered_count) {

> > > @@ -538,45 +564,24 @@ int ff_qsv_process_data(AVCodecContext

> > 

> > *avctx,

> > > QSVContext *q,

> > >              q->buffered_count--;

> > >              return qsv_decode(avctx, q, frame, got_frame,

> > 

> > &zero_pkt);

> > >          }

> > > -

> > >          q->reinit_flag = 0;

> > > 

> > > -        qsv_format = ff_qsv_map_pixfmt(q->parser->format, &q-

> > > > fourcc);

> > > 

> > > -        if (qsv_format < 0) {

> > > -            av_log(avctx, AV_LOG_ERROR,

> > > -                   "Decoding pixel format '%s' is not

> > 

> > supported\n",

> > > -                   av_get_pix_fmt_name(q->parser->format));

> > > -            ret = AVERROR(ENOSYS);

> > > -            goto reinit_fail;

> > > -        }

> > > +        q->orig_pix_fmt = avctx->pix_fmt = pix_fmts[1] =

> > > ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC);

> > > 

> > > -        q->orig_pix_fmt     = q->parser->format;

> > > -        avctx->pix_fmt      = pix_fmts[1] = qsv_format;

> > > -        avctx->width        = q->parser->width;

> > > -        avctx->height       = q->parser->height;

> > > -        avctx->coded_width  = FFALIGN(q->parser->coded_width,

> > 

> > 16);

> > > -        avctx->coded_height = FFALIGN(q->parser->coded_height,

> > 

> > 16);

> > > -        avctx->level        = q->avctx_internal->level;

> > > -        avctx->profile      = q->avctx_internal->profile;

> > > +        avctx->coded_width  = param.mfx.FrameInfo.Width;

> > > +        avctx->coded_height = param.mfx.FrameInfo.Height;

> > > 

> > > -        ret = ff_get_format(avctx, pix_fmts);

> > > +        ret = qsv_decode_preinit(avctx, q, pix_fmts, &param);

> > >          if (ret < 0)

> > >              goto reinit_fail;

> > > +        q->initialized = 0;

> > > +    }

> > > 

> > > -        avctx->pix_fmt = ret;

> > > -

> > > -        desc = av_pix_fmt_desc_get(avctx->pix_fmt);

> > > -        if (!desc)

> > > -            goto reinit_fail;

> > > -

> > > -         if (desc->comp[0].depth > 8) {

> > > -            avctx->coded_width

> > 

> > =  FFALIGN(q->parser->coded_width,

> > > 32);

> > > -            avctx->coded_height =

> > 

> > FFALIGN(q->parser->coded_height,

> > > 32);

> > > -        }

> > > -

> > > -        ret = qsv_decode_init(avctx, q);

> > > +    if (!q->initialized) {

> > > +        ret = qsv_decode_init(avctx, q, &param);

> > >          if (ret < 0)

> > >              goto reinit_fail;

> > > +        q->initialized = 1;

> > >      }

> > > 

> > >      return qsv_decode(avctx, q, frame, got_frame, pkt);

> > 

> > So, from my perspective you should not attempt to call decode

> > header on

> > each incoming data portion and try to interpret results. Instead

> > you should

> > respect what mediasdk DecodeFrameAsync is returning. Basically it

> > will

> > return MFX_ERR_INCOMPATIBLE_VIDEO_PARAM is decoder reset is

> > required.

> > This gives you 2 situations when you should call

> > DecodeHeader:

> > 1. You either call it if decoder is not initalized at all 2. Or you

> > call it upon

> > receiving MFX_ERR_INCOMPATIBLE_VIDEO_PARAM

> > 

> > Once you received MFX_ERR_INCOMPATIBLE_VIDEO_PARAM step to

> > perform are:

> > 1. Flush cached frames from medisdk decoder passing nullptr

> > bitstream to

> > DecodeFrameAsync 2. Run decode header and get updated parameters 3.

> > Check (via QueryIOSurf) and potentially reallocate surfaces to

> > satisfy new

> > bitstream requirement 4. Reset decoder with new parameters

> 

> 1. This patch is just to replace original parser to MSDK parser. I

> don't like to mix other thing here (thus probably introduces new

> bugs, but full round testing has been done for this patch).

>   If it is really necessary, should be done by an additional patch. 

> 2. Unfortunately, if we don't reset and set correct

> resolution/pix_fmt, FFmpeg-qsv already failed in the progress of MSDK

> initialization (e,g: pass a NV12 format but actually it is P010. see

> : https://github.com/FFmpeg/FFmpeg/blob/master/libavcodec/qsv.c#L424

> )

>   There is no opportunity to get the status of DecodeFrameAsync at

> these case. I haven't compared MSDK sample decoder with current

> FFmpeg (Will take a look later). But IIRC, MSDK sample decoder isn't

> doing a better job than ffmpeg-qsv for reinit case.

>   (one example, it is hang when run sample_decode h264 -i /fate-

> suite/h264/reinit-large_420_8-to-small_420_8.h264 -r , and this clip

> is changing from a larger resolution to a smaller one. I wonder to

> know what will happen if change resolution from smaller to a larger,

> eg, 720x480 to 1920x1080).

> 3. IMHO, reinit work is better to do before decoding, instead of

> decoding it and recover it after it is failed. 

> 4. This a common function for all decoders, I would like to see it is

> under control of ffmpeg. MSDK may have no unified implementation of

> all codecs ( e.g: https://github.com/Intel-Media-SDK/MediaSDK/issues/

> 333 ).

>   Your suggestion may be addressed only when I see any new issues or

> bugs. But currently I can't see the necessity. 

> 

> > Application may attempt to optimize procedure on step 3 above and

> > avoid

> > surfaces reallocation if possible. For example, if you have

> > allocated

> > 1920x1080 surfaces and resolution changed to 720x480, you may

> > actually

> > stay on 1920x1080 if you are ok to use more memory hoping to

> > receive

> > 1920x1080 stream in the future again. Eventually this would work if

> > you have

> > enough number of 1920x1080 surfaces.

> 

> Hm, this is a simpler topic, https://patchwork.ffmpeg.org/patch/12257

> / is to prepare this though decoder should be updated too. But as

> previous discussion, thus should be a separated patch and need pre-

> testing.
Zhong Li March 14, 2019, 11:03 a.m. UTC | #6
> From: Rogozhkin, Dmitry V

> Sent: Tuesday, March 12, 2019 7:37 AM

> To: Li, Zhong <zhong.li@intel.com>; ffmpeg-devel@ffmpeg.org

> Subject: Re: [FFmpeg-devel] [PATCH v3 3/6] lavc/qsvdec: Replace current

> parser with MFXVideoDECODE_DecodeHeader()

> 

> On Mon, 2019-03-11 at 17:23 +0800, Li, Zhong wrote:

> > > -----Original Message-----

> > > From: Rogozhkin, Dmitry V

> > > Sent: Saturday, March 9, 2019 8:48 AM

> > > To: ffmpeg-devel@ffmpeg.org

> > > Cc: Li, Zhong <zhong.li@intel.com>

> > > Subject: Re: [FFmpeg-devel] [PATCH v3 3/6] lavc/qsvdec: Replace

> > > current parser with MFXVideoDECODE_DecodeHeader()

> > >

> > > On Fri, 2019-03-08 at 15:40 +0800, Zhong Li wrote:

> > > > Using MSDK parser can improve qsv decoder pass rate in some cases

> > > > (E.g:

> > > > sps declares a wrong level_idc, smaller than it should be).

> > > > And it is necessary for adding new qsv decoders such as MJPEG and

> > > > VP9

> > > > since current parser can't provide enough information.

> > > > Actually using MFXVideoDECODE_DecodeHeader() was disscussed at

> > > > https://ffmpeg.org/pipermail/ffmpeg-devel/2015-July/175734.html

> > > > and

> > > > merged as commit 1acb19d, but was overwritten when merged libav

> > > > patches (commit: 1f26a23) without any explain.

> > > >

> > > > v2: split decode header from decode_init, and call it for

> > > > everyframe to detect format/resoultion change. It can fix some

> > > > regression issues such as hevc 10bits decoding.

> > > >

> > > > Signed-off-by: Zhong Li <zhong.li@intel.com>

> > > > ---

> > > >  libavcodec/qsvdec.c       | 172

> ++++++++++++++++++++----------

> > > > ----

> > > > ----

> > > >  libavcodec/qsvdec.h       |   2 +

> > > >  libavcodec/qsvdec_h2645.c |   1 +

> > > >  libavcodec/qsvdec_other.c |   1 +

> > > >  4 files changed, 93 insertions(+), 83 deletions(-)

> > > >

> > > > diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c index

> > > > 4a0be811fb..b78026e14d 100644

> > > > --- a/libavcodec/qsvdec.c

> > > > +++ b/libavcodec/qsvdec.c

> > > > @@ -120,19 +120,17 @@ static inline unsigned int

> > > > qsv_fifo_size(const

> > > > AVFifoBuffer* fifo)

> > > >      return av_fifo_size(fifo) / qsv_fifo_item_size();

> > > >  }

> > > >

> > > > -static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q)

> > > > +static int qsv_decode_preinit(AVCodecContext *avctx, QSVContext

> > > > *q,

> > > > enum AVPixelFormat *pix_fmts, mfxVideoParam *param)

> > > >  {

> > > > -    const AVPixFmtDescriptor *desc;

> > > >      mfxSession session = NULL;

> > > >      int iopattern = 0;

> > > > -    mfxVideoParam param = { 0 };

> > > > -    int frame_width  = avctx->coded_width;

> > > > -    int frame_height = avctx->coded_height;

> > > >      int ret;

> > > >

> > > > -    desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);

> > > > -    if (!desc)

> > > > -        return AVERROR_BUG;

> > > > +    ret = ff_get_format(avctx, pix_fmts);

> > > > +    if (ret < 0) {

> > > > +        q->orig_pix_fmt = avctx->pix_fmt = AV_PIX_FMT_NONE;

> > > > +        return ret;

> > > > +    }

> > > >

> > > >      if (!q->async_fifo) {

> > > >          q->async_fifo = av_fifo_alloc(q->async_depth *

> > > > qsv_fifo_item_size()); @@ -170,48 +168,72 @@ static int

> > > > qsv_decode_init(AVCodecContext *avctx, QSVContext *q)

> > > >          return ret;

> > > >      }

> > > >

> > > > -    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);

> > > > -    if (ret < 0)

> > > > -        return ret;

> > > > +    param->IOPattern   = q->iopattern;

> > > > +    param->AsyncDepth  = q->async_depth;

> > > > +    param->ExtParam    = q->ext_buffers;

> > > > +    param->NumExtParam = q->nb_ext_buffers;

> > > >

> > > > -    param.mfx.CodecId      = ret;

> > > > -    param.mfx.CodecProfile = ff_qsv_profile_to_mfx(avctx-

> > > > >codec_id,

> > > > avctx->profile);

> > > > -    param.mfx.CodecLevel   = avctx->level ==

> > >

> > > FF_LEVEL_UNKNOWN ?

> > > > MFX_LEVEL_UNKNOWN : avctx->level;

> > > > -

> > > > -    param.mfx.FrameInfo.BitDepthLuma   =

> desc->comp[0].depth;

> > > > -    param.mfx.FrameInfo.BitDepthChroma = desc->comp[0].depth;

> > > > -    param.mfx.FrameInfo.Shift          =

> desc->comp[0].depth >

> > >

> > > 8;

> > > > -    param.mfx.FrameInfo.FourCC         = q->fourcc;

> > > > -    param.mfx.FrameInfo.Width          = frame_width;

> > > > -    param.mfx.FrameInfo.Height         = frame_height;

> > > > -    param.mfx.FrameInfo.ChromaFormat   =

> > >

> > > MFX_CHROMAFORMAT_YUV420;

> > > > -

> > > > -    switch (avctx->field_order) {

> > > > -    case AV_FIELD_PROGRESSIVE:

> > > > -        param.mfx.FrameInfo.PicStruct =

> > >

> > > MFX_PICSTRUCT_PROGRESSIVE;

> > > > -        break;

> > > > -    case AV_FIELD_TT:

> > > > -        param.mfx.FrameInfo.PicStruct =

> > >

> > > MFX_PICSTRUCT_FIELD_TFF;

> > > > -        break;

> > > > -    case AV_FIELD_BB:

> > > > -        param.mfx.FrameInfo.PicStruct =

> > >

> > > MFX_PICSTRUCT_FIELD_BFF;

> > > > -        break;

> > > > -    default:

> > > > -        param.mfx.FrameInfo.PicStruct =

> > >

> > > MFX_PICSTRUCT_UNKNOWN;

> > > > -        break;

> > > > -    }

> > > > +    return 0;

> > > > + }

> > > >

> > > > -    param.IOPattern   = q->iopattern;

> > > > -    param.AsyncDepth  = q->async_depth;

> > > > -    param.ExtParam    = q->ext_buffers;

> > > > -    param.NumExtParam = q->nb_ext_buffers;

> > > > +static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q,

> > > > mfxVideoParam *param)

> > > > +{

> > > > +    int ret;

> > > >

> > > > -    ret = MFXVideoDECODE_Init(q->session, &param);

> > > > +    avctx->width        = param->mfx.FrameInfo.CropW;

> > > > +    avctx->height       = param->mfx.FrameInfo.CropH;

> > > > +    avctx->coded_width  = param->mfx.FrameInfo.Width;

> > > > +    avctx->coded_height = param->mfx.FrameInfo.Height;

> > > > +    avctx->level        = param->mfx.CodecLevel;

> > > > +    avctx->profile      = param->mfx.CodecProfile;

> > > > +    avctx->field_order  = ff_qsv_map_picstruct(param-

> > > > > mfx.FrameInfo.PicStruct);

> > > >

> > > > +    avctx->pix_fmt      = ff_qsv_map_fourcc(param-

> > > > > mfx.FrameInfo.FourCC);

> > > >

> > > > +

> > > > +    ret = MFXVideoDECODE_Init(q->session, param);

> > > >      if (ret < 0)

> > > >          return ff_qsv_print_error(avctx, ret,

> > > >                                    "Error initializing

> the

> > >

> > > MFX video

> > > > decoder");

> > > >

> > > > -    q->frame_info = param.mfx.FrameInfo;

> > > > +    q->frame_info = param->mfx.FrameInfo;

> > > > +

> > > > +    return 0;

> > > > +}

> > > > +

> > > > +static int qsv_decode_header(AVCodecContext *avctx, QSVContext

> > > > *q,

> > > > AVPacket *avpkt, enum AVPixelFormat *pix_fmts, mfxVideoParam

> > > > *param)

> > > > +{

> > > > +    int ret;

> > > > +

> > > > +    mfxBitstream bs = { { { 0 } } };

> > > > +

> > > > +    if (avpkt->size) {

> > > > +        bs.Data       = avpkt->data;

> > > > +        bs.DataLength = avpkt->size;

> > > > +        bs.MaxLength  = bs.DataLength;

> > > > +        bs.TimeStamp  = avpkt->pts;

> > > > +        if (avctx->field_order == AV_FIELD_PROGRESSIVE)

> > > > +            bs.DataFlag   |=

> > >

> > > MFX_BITSTREAM_COMPLETE_FRAME;

> > > > +    } else

> > > > +        return AVERROR_INVALIDDATA;

> > > > +

> > > > +

> > > > +    if(!q->session) {

> > > > +        ret = qsv_decode_preinit(avctx, q, pix_fmts, param);

> > > > +        if (ret < 0)

> > > > +            return ret;

> > > > +    }

> > > > +

> > > > +    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);

> > > > +    if (ret < 0)

> > > > +        return ret;

> > > > +

> > > > +    param->mfx.CodecId = ret;

> > > > +    ret = MFXVideoDECODE_DecodeHeader(q->session, &bs,

> param);

> > > > +    if (MFX_ERR_MORE_DATA == ret) {

> > > > +       return AVERROR(EAGAIN);

> > > > +    }

> > > > +    if (ret < 0)

> > > > +        return ff_qsv_print_error(avctx, ret,

> > > > +                "Error decoding stream header");

> > > >

> > > >      return 0;

> > > >  }

> > > > @@ -494,7 +516,10 @@ int ff_qsv_process_data(AVCodecContext

> > > > *avctx,

> > > > QSVContext *q,

> > > >      uint8_t *dummy_data;

> > > >      int dummy_size;

> > > >      int ret;

> > > > -    const AVPixFmtDescriptor *desc;

> > > > +    mfxVideoParam param = { 0 };

> > > > +    enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,

> > > >

> > >

> > >

> +                                       AV_PIX_FMT_NV

> > > 12,

> > > >

> > >

> > >

> +                                       AV_PIX_FMT_N

> > > ONE };

> > > >

> > > >      if (!q->avctx_internal) {

> > > >          q->avctx_internal = avcodec_alloc_context3(NULL); @@

> > >

> > > -508,7

> > > > +533,6 @@ int ff_qsv_process_data(AVCodecContext *avctx,

> > > > QSVContext

> > > > *q,

> > > >              return AVERROR(ENOMEM);

> > > >

> > > >          q->parser->flags |= PARSER_FLAG_COMPLETE_FRAMES;

> > > > -        q->orig_pix_fmt   = AV_PIX_FMT_NONE;

> > > >      }

> > > >

> > > >      if (!pkt->size)

> > > > @@ -521,15 +545,17 @@ int ff_qsv_process_data(AVCodecContext

> > >

> > > *avctx,

> > > > QSVContext *q,

> > > >                       pkt->data, pkt->size, pkt->pts,

> pkt->dts,

> > > >                       pkt->pos);

> > > >

> > > > -    avctx->field_order  = q->parser->field_order;

> > > >      /* TODO: flush delayed frames on reinit */

> > > > -    if (q->parser->format       != q->orig_pix_fmt    ||

> > > > -        FFALIGN(q->parser->coded_width, 16)  !=

> FFALIGN(avctx-

> > > > > coded_width, 16) ||

> > > >

> > > > -        FFALIGN(q->parser->coded_height, 16) != FFALIGN(avctx-

> > > > > coded_height, 16)) {

> > > >

> > > > -        enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,

> > > >

> > >

> > >

> -                                           AV_PIX_F

> MT_NONE,

> > > >

> > >

> > >

> -                                           AV_PIX_F

> MT_NONE };

> > > > -        enum AVPixelFormat qsv_format;

> > > > +

> > > > +    // sw_pix_fmt was initialized as NV12, will be updated after

> > > > header decoded if not true.

> > > > +    if (q->orig_pix_fmt != AV_PIX_FMT_NONE)

> > > > +        pix_fmts[1] = q->orig_pix_fmt;

> > >

> > > Well, it still seems non self-explanatory to me why we have

> > > pix_fmts[1] here.

> >

> > I think it has been explained and descripted in my comment. It means

> > sw_pix_fmt.

> > For more detail, probably you want to check the implementation of

> > ff_get_format().

> >

> > > > +

> > > > +    ret = qsv_decode_header(avctx, q, pkt, pix_fmts, &param);

> > >

> > > Parsing can be expensive to perform on each frame. See more below.

> >

> > 1. Parsing is just to reading header bits, I don't think it is CPU

> > heavy calculation task, compared with slice level decoding.

> > 2. Mostly only key frames have completed header. For non-key frame,

> > just return MFX_ERR_MORE_DATA instead of real parsing work.

> 

> 

> That was long when I looked myself in details of DecodeHeader, but I think

> that what the following will happen if frame is missing a header and you feed

> it into mediasdk: mediasdk will read the whole bitstream to the end trying to

> find a header. Once it will fail to find it mediasdk will return

> MFX_ERR_MORE_DATA. I think you will be able to see that if you will print

> mfxBitstream::DataOffset and mfxBitstream::DataLength before and after

> your call for MFXVideoDECODE_DecodeHeader(). So, mediasdk actually

> changes DataOffset either pointing to the beginning of the header or right

> after the header (I don't remember exactly, sorry). If header not found  it

> sets DataOffset=DataLength and DataLength=0 requesting new bitstream

> portion. The reason why you think everything is ok is that you construct

> mfxBitstream structure anew each time in qsv_decode_header.

> Hence, you just don't notice that mediasdk performs a massive parsing.

> 

> I afraid that as is your change will result in massive CPU% regressions.


That is a good point, if massive CPU% regressions really happens, definitely it should be taken seriously. 
Then I compared the performance and CPU usage with and without this patch. I can't find obvious performance drop or higher CPU usage, at least for h264. 
And I take a little deep to check how MSDK search start code: https://github.com/Intel-Media-SDK/MediaSDK/blob/master/_studio/shared/umc/codec/h264_dec/src/umc_h264_nal_spl.cpp#L319 
If MSDK can find the start code, not matter SPS/PPS or slice header start code, it will end the search. 
Yes, mostly for non-key frame, there is no SPS/PPS. But slice header is still exist, thus mean MSDK don't need to search the whole packet. 
For case of AnnexC, ffmpeg-qsv will convert it to be AnnexB: https://github.com/FFmpeg/FFmpeg/blob/master/libavcodec/qsvdec_h2645.c#L257, that can make sure start code is always existed. 

If you agree above, I would like to keep current implementation and push this patch, else it will pending the MJPEG/VP9 decoding patches.
Zhong Li March 14, 2019, 11:16 a.m. UTC | #7
> From: Rogozhkin, Dmitry V

> Sent: Tuesday, March 12, 2019 8:04 AM

> To: Li, Zhong <zhong.li@intel.com>; ffmpeg-devel@ffmpeg.org

> Subject: Re: [FFmpeg-devel] [PATCH v3 3/6] lavc/qsvdec: Replace current

> parser with MFXVideoDECODE_DecodeHeader()

> 

> pix_fmts[1] is misleading. And I think it can be quite easily avoided.

> 

> If I understand the code correctly, qsv_decode_preinit is the place (and the

> only place) where you need an array of pix_fmts. You actually request

> ffmpeg to select one of the formats and program it into avctx.

> Is that right?

> 

> If so, could we try to define the array exactly in this function and avoid

> passing the whole array around? See comments below for suggestions...

> 

> 

> On Mon, 2019-03-11 at 17:23 +0800, Li, Zhong wrote:

> > > -----Original Message-----

> > > From: Rogozhkin, Dmitry V

> > > Sent: Saturday, March 9, 2019 8:48 AM

> > > To: ffmpeg-devel@ffmpeg.org

> > > Cc: Li, Zhong <zhong.li@intel.com>

> > > Subject: Re: [FFmpeg-devel] [PATCH v3 3/6] lavc/qsvdec: Replace

> > > current parser with MFXVideoDECODE_DecodeHeader()

> > >

> > > On Fri, 2019-03-08 at 15:40 +0800, Zhong Li wrote:

> > > > Using MSDK parser can improve qsv decoder pass rate in some cases

> > > > (E.g:

> > > > sps declares a wrong level_idc, smaller than it should be).

> > > > And it is necessary for adding new qsv decoders such as MJPEG and

> > > > VP9

> > > > since current parser can't provide enough information.

> > > > Actually using MFXVideoDECODE_DecodeHeader() was disscussed at

> > > > https://ffmpeg.org/pipermail/ffmpeg-devel/2015-July/175734.html

> > > > and

> > > > merged as commit 1acb19d, but was overwritten when merged libav

> > > > patches (commit: 1f26a23) without any explain.

> > > >

> > > > v2: split decode header from decode_init, and call it for

> > > > everyframe to detect format/resoultion change. It can fix some

> > > > regression issues such as hevc 10bits decoding.

> > > >

> > > > Signed-off-by: Zhong Li <zhong.li@intel.com>

> > > > ---

> > > >  libavcodec/qsvdec.c       | 172

> ++++++++++++++++++++----------

> > > > ----

> > > > ----

> > > >  libavcodec/qsvdec.h       |   2 +

> > > >  libavcodec/qsvdec_h2645.c |   1 +

> > > >  libavcodec/qsvdec_other.c |   1 +

> > > >  4 files changed, 93 insertions(+), 83 deletions(-)

> > > >

> > > > diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c index

> > > > 4a0be811fb..b78026e14d 100644

> > > > --- a/libavcodec/qsvdec.c

> > > > +++ b/libavcodec/qsvdec.c

> > > > @@ -120,19 +120,17 @@ static inline unsigned int

> > > > qsv_fifo_size(const

> > > > AVFifoBuffer* fifo)

> > > >      return av_fifo_size(fifo) / qsv_fifo_item_size();

> > > >  }

> > > >

> > > > -static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q)

> > > > +static int qsv_decode_preinit(AVCodecContext *avctx, QSVContext

> > > > *q,

> > > > enum AVPixelFormat *pix_fmts, mfxVideoParam *param)

> 

> Just: enum AVPixelFormat pix_fmt. Single one we need.


No, it must be array, and the last element of this array must be AV_PIX_FMT_NONE (else function ff_get_format() doesn't know how many elements contained by this array). 
The first element of pix_fmt[] is HW pixel format, the second element of pix_fmt[] is SW pixel format. 
Once again, I would like you to take a look the implementation detail of ff_get_format ().
Rogozhkin, Dmitry V March 15, 2019, 8:22 p.m. UTC | #8
On Thu, 2019-03-14 at 19:16 +0800, Li, Zhong wrote:
> > From: Rogozhkin, Dmitry V

> > Sent: Tuesday, March 12, 2019 8:04 AM

> > To: Li, Zhong <zhong.li@intel.com>; ffmpeg-devel@ffmpeg.org

> > Subject: Re: [FFmpeg-devel] [PATCH v3 3/6] lavc/qsvdec: Replace

> > current

> > parser with MFXVideoDECODE_DecodeHeader()

> > 

> > pix_fmts[1] is misleading. And I think it can be quite easily

> > avoided.

> > 

> > If I understand the code correctly, qsv_decode_preinit is the place

> > (and the

> > only place) where you need an array of pix_fmts. You actually

> > request

> > ffmpeg to select one of the formats and program it into avctx.

> > Is that right?

> > 

> > If so, could we try to define the array exactly in this function

> > and avoid

> > passing the whole array around? See comments below for

> > suggestions...

> > 

> > 

> > On Mon, 2019-03-11 at 17:23 +0800, Li, Zhong wrote:

> > > > -----Original Message-----

> > > > From: Rogozhkin, Dmitry V

> > > > Sent: Saturday, March 9, 2019 8:48 AM

> > > > To: ffmpeg-devel@ffmpeg.org

> > > > Cc: Li, Zhong <zhong.li@intel.com>

> > > > Subject: Re: [FFmpeg-devel] [PATCH v3 3/6] lavc/qsvdec: Replace

> > > > current parser with MFXVideoDECODE_DecodeHeader()

> > > > 

> > > > On Fri, 2019-03-08 at 15:40 +0800, Zhong Li wrote:

> > > > > Using MSDK parser can improve qsv decoder pass rate in some

> > > > > cases

> > > > > (E.g:

> > > > > sps declares a wrong level_idc, smaller than it should be).

> > > > > And it is necessary for adding new qsv decoders such as MJPEG

> > > > > and

> > > > > VP9

> > > > > since current parser can't provide enough information.

> > > > > Actually using MFXVideoDECODE_DecodeHeader() was disscussed

> > > > > at

> > > > > https://ffmpeg.org/pipermail/ffmpeg-devel/2015-July/175734.ht

> > > > > ml

> > > > > and

> > > > > merged as commit 1acb19d, but was overwritten when merged

> > > > > libav

> > > > > patches (commit: 1f26a23) without any explain.

> > > > > 

> > > > > v2: split decode header from decode_init, and call it for

> > > > > everyframe to detect format/resoultion change. It can fix

> > > > > some

> > > > > regression issues such as hevc 10bits decoding.

> > > > > 

> > > > > Signed-off-by: Zhong Li <zhong.li@intel.com>

> > > > > ---

> > > > >  libavcodec/qsvdec.c       | 172

> > 

> > ++++++++++++++++++++----------

> > > > > ----

> > > > > ----

> > > > >  libavcodec/qsvdec.h       |   2 +

> > > > >  libavcodec/qsvdec_h2645.c |   1 +

> > > > >  libavcodec/qsvdec_other.c |   1 +

> > > > >  4 files changed, 93 insertions(+), 83 deletions(-)

> > > > > 

> > > > > diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c index

> > > > > 4a0be811fb..b78026e14d 100644

> > > > > --- a/libavcodec/qsvdec.c

> > > > > +++ b/libavcodec/qsvdec.c

> > > > > @@ -120,19 +120,17 @@ static inline unsigned int

> > > > > qsv_fifo_size(const

> > > > > AVFifoBuffer* fifo)

> > > > >      return av_fifo_size(fifo) / qsv_fifo_item_size();

> > > > >  }

> > > > > 

> > > > > -static int qsv_decode_init(AVCodecContext *avctx, QSVContext

> > > > > *q)

> > > > > +static int qsv_decode_preinit(AVCodecContext *avctx,

> > > > > QSVContext

> > > > > *q,

> > > > > enum AVPixelFormat *pix_fmts, mfxVideoParam *param)

> > 

> > Just: enum AVPixelFormat pix_fmt. Single one we need.

> 

> No, it must be array, and the last element of this array must be

> AV_PIX_FMT_NONE (else function ff_get_format() doesn't know how many

> elements contained by this array). 

> The first element of pix_fmt[] is HW pixel format, the second element

> of pix_fmt[] is SW pixel format. 

> Once again, I would like you to take a look the implementation detail

> of ff_get_format ().

> 


I understand that, but what you have in your patch is overcomplicate
design. You pass the whole array around from one function to another
while you just use single element from this array, i.e. pix_fmt[1] -
you need to pass only it and construct array when you will actually use
it. Original code had this closer together and was ok. But in your
refactor it you step into problem.

Here is the patch which I think you need to amend into this change: htt
ps://github.com/lizhong1008/ffmpeg-1/pull/1
Rogozhkin, Dmitry V March 16, 2019, 1:16 a.m. UTC | #9
On Thu, 2019-03-14 at 19:03 +0800, Li, Zhong wrote:
> > From: Rogozhkin, Dmitry V

> > Sent: Tuesday, March 12, 2019 7:37 AM

> > To: Li, Zhong <zhong.li@intel.com>; ffmpeg-devel@ffmpeg.org

> > Subject: Re: [FFmpeg-devel] [PATCH v3 3/6] lavc/qsvdec: Replace

> > current

> > parser with MFXVideoDECODE_DecodeHeader()

> > 

> > On Mon, 2019-03-11 at 17:23 +0800, Li, Zhong wrote:

> > > > -----Original Message-----

> > > > From: Rogozhkin, Dmitry V

> > > > Sent: Saturday, March 9, 2019 8:48 AM

> > > > To: ffmpeg-devel@ffmpeg.org

> > > > Cc: Li, Zhong <zhong.li@intel.com>

> > > > Subject: Re: [FFmpeg-devel] [PATCH v3 3/6] lavc/qsvdec: Replace

> > > > current parser with MFXVideoDECODE_DecodeHeader()

> > > > 

> > > > On Fri, 2019-03-08 at 15:40 +0800, Zhong Li wrote:

> > > > > Using MSDK parser can improve qsv decoder pass rate in some

> > > > > cases

> > > > > (E.g:

> > > > > sps declares a wrong level_idc, smaller than it should be).

> > > > > And it is necessary for adding new qsv decoders such as MJPEG

> > > > > and

> > > > > VP9

> > > > > since current parser can't provide enough information.

> > > > > Actually using MFXVideoDECODE_DecodeHeader() was disscussed

> > > > > at

> > > > > https://ffmpeg.org/pipermail/ffmpeg-devel/2015-July/175734.ht

> > > > > ml

> > > > > and

> > > > > merged as commit 1acb19d, but was overwritten when merged

> > > > > libav

> > > > > patches (commit: 1f26a23) without any explain.

> > > > > 

> > > > > v2: split decode header from decode_init, and call it for

> > > > > everyframe to detect format/resoultion change. It can fix

> > > > > some

> > > > > regression issues such as hevc 10bits decoding.

> > > > > 

> > > > > Signed-off-by: Zhong Li <zhong.li@intel.com>

> > > > > ---

> > > > >  libavcodec/qsvdec.c       | 172

> > 

> > ++++++++++++++++++++----------

> > > > > ----

> > > > > ----

> > > > >  libavcodec/qsvdec.h       |   2 +

> > > > >  libavcodec/qsvdec_h2645.c |   1 +

> > > > >  libavcodec/qsvdec_other.c |   1 +

> > > > >  4 files changed, 93 insertions(+), 83 deletions(-)

> > > > > 

> > > > > diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c index

> > > > > 4a0be811fb..b78026e14d 100644

> > > > > --- a/libavcodec/qsvdec.c

> > > > > +++ b/libavcodec/qsvdec.c

> > > > > @@ -120,19 +120,17 @@ static inline unsigned int

> > > > > qsv_fifo_size(const

> > > > > AVFifoBuffer* fifo)

> > > > >      return av_fifo_size(fifo) / qsv_fifo_item_size();

> > > > >  }

> > > > > 

> > > > > -static int qsv_decode_init(AVCodecContext *avctx, QSVContext

> > > > > *q)

> > > > > +static int qsv_decode_preinit(AVCodecContext *avctx,

> > > > > QSVContext

> > > > > *q,

> > > > > enum AVPixelFormat *pix_fmts, mfxVideoParam *param)

> > > > >  {

> > > > > -    const AVPixFmtDescriptor *desc;

> > > > >      mfxSession session = NULL;

> > > > >      int iopattern = 0;

> > > > > -    mfxVideoParam param = { 0 };

> > > > > -    int frame_width  = avctx->coded_width;

> > > > > -    int frame_height = avctx->coded_height;

> > > > >      int ret;

> > > > > 

> > > > > -    desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);

> > > > > -    if (!desc)

> > > > > -        return AVERROR_BUG;

> > > > > +    ret = ff_get_format(avctx, pix_fmts);

> > > > > +    if (ret < 0) {

> > > > > +        q->orig_pix_fmt = avctx->pix_fmt = AV_PIX_FMT_NONE;

> > > > > +        return ret;

> > > > > +    }

> > > > > 

> > > > >      if (!q->async_fifo) {

> > > > >          q->async_fifo = av_fifo_alloc(q->async_depth *

> > > > > qsv_fifo_item_size()); @@ -170,48 +168,72 @@ static int

> > > > > qsv_decode_init(AVCodecContext *avctx, QSVContext *q)

> > > > >          return ret;

> > > > >      }

> > > > > 

> > > > > -    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);

> > > > > -    if (ret < 0)

> > > > > -        return ret;

> > > > > +    param->IOPattern   = q->iopattern;

> > > > > +    param->AsyncDepth  = q->async_depth;

> > > > > +    param->ExtParam    = q->ext_buffers;

> > > > > +    param->NumExtParam = q->nb_ext_buffers;

> > > > > 

> > > > > -    param.mfx.CodecId      = ret;

> > > > > -    param.mfx.CodecProfile = ff_qsv_profile_to_mfx(avctx-

> > > > > > codec_id,

> > > > > 

> > > > > avctx->profile);

> > > > > -    param.mfx.CodecLevel   = avctx->level ==

> > > > 

> > > > FF_LEVEL_UNKNOWN ?

> > > > > MFX_LEVEL_UNKNOWN : avctx->level;

> > > > > -

> > > > > -    param.mfx.FrameInfo.BitDepthLuma   =

> > 

> > desc->comp[0].depth;

> > > > > -    param.mfx.FrameInfo.BitDepthChroma = desc-

> > > > > >comp[0].depth;

> > > > > -    param.mfx.FrameInfo.Shift          =

> > 

> > desc->comp[0].depth >

> > > > 

> > > > 8;

> > > > > -    param.mfx.FrameInfo.FourCC         = q->fourcc;

> > > > > -    param.mfx.FrameInfo.Width          = frame_width;

> > > > > -    param.mfx.FrameInfo.Height         = frame_height;

> > > > > -    param.mfx.FrameInfo.ChromaFormat   =

> > > > 

> > > > MFX_CHROMAFORMAT_YUV420;

> > > > > -

> > > > > -    switch (avctx->field_order) {

> > > > > -    case AV_FIELD_PROGRESSIVE:

> > > > > -        param.mfx.FrameInfo.PicStruct =

> > > > 

> > > > MFX_PICSTRUCT_PROGRESSIVE;

> > > > > -        break;

> > > > > -    case AV_FIELD_TT:

> > > > > -        param.mfx.FrameInfo.PicStruct =

> > > > 

> > > > MFX_PICSTRUCT_FIELD_TFF;

> > > > > -        break;

> > > > > -    case AV_FIELD_BB:

> > > > > -        param.mfx.FrameInfo.PicStruct =

> > > > 

> > > > MFX_PICSTRUCT_FIELD_BFF;

> > > > > -        break;

> > > > > -    default:

> > > > > -        param.mfx.FrameInfo.PicStruct =

> > > > 

> > > > MFX_PICSTRUCT_UNKNOWN;

> > > > > -        break;

> > > > > -    }

> > > > > +    return 0;

> > > > > + }

> > > > > 

> > > > > -    param.IOPattern   = q->iopattern;

> > > > > -    param.AsyncDepth  = q->async_depth;

> > > > > -    param.ExtParam    = q->ext_buffers;

> > > > > -    param.NumExtParam = q->nb_ext_buffers;

> > > > > +static int qsv_decode_init(AVCodecContext *avctx, QSVContext

> > > > > *q,

> > > > > mfxVideoParam *param)

> > > > > +{

> > > > > +    int ret;

> > > > > 

> > > > > -    ret = MFXVideoDECODE_Init(q->session, &param);

> > > > > +    avctx->width        = param->mfx.FrameInfo.CropW;

> > > > > +    avctx->height       = param->mfx.FrameInfo.CropH;

> > > > > +    avctx->coded_width  = param->mfx.FrameInfo.Width;

> > > > > +    avctx->coded_height = param->mfx.FrameInfo.Height;

> > > > > +    avctx->level        = param->mfx.CodecLevel;

> > > > > +    avctx->profile      = param->mfx.CodecProfile;

> > > > > +    avctx->field_order  = ff_qsv_map_picstruct(param-

> > > > > > mfx.FrameInfo.PicStruct);

> > > > > 

> > > > > +    avctx->pix_fmt      = ff_qsv_map_fourcc(param-

> > > > > > mfx.FrameInfo.FourCC);

> > > > > 

> > > > > +

> > > > > +    ret = MFXVideoDECODE_Init(q->session, param);

> > > > >      if (ret < 0)

> > > > >          return ff_qsv_print_error(avctx, ret,

> > > > >                                    "Error initializing

> > 

> > the

> > > > 

> > > > MFX video

> > > > > decoder");

> > > > > 

> > > > > -    q->frame_info = param.mfx.FrameInfo;

> > > > > +    q->frame_info = param->mfx.FrameInfo;

> > > > > +

> > > > > +    return 0;

> > > > > +}

> > > > > +

> > > > > +static int qsv_decode_header(AVCodecContext *avctx,

> > > > > QSVContext

> > > > > *q,

> > > > > AVPacket *avpkt, enum AVPixelFormat *pix_fmts, mfxVideoParam

> > > > > *param)

> > > > > +{

> > > > > +    int ret;

> > > > > +

> > > > > +    mfxBitstream bs = { { { 0 } } };

> > > > > +

> > > > > +    if (avpkt->size) {

> > > > > +        bs.Data       = avpkt->data;

> > > > > +        bs.DataLength = avpkt->size;

> > > > > +        bs.MaxLength  = bs.DataLength;

> > > > > +        bs.TimeStamp  = avpkt->pts;

> > > > > +        if (avctx->field_order == AV_FIELD_PROGRESSIVE)

> > > > > +            bs.DataFlag   |=

> > > > 

> > > > MFX_BITSTREAM_COMPLETE_FRAME;

> > > > > +    } else

> > > > > +        return AVERROR_INVALIDDATA;

> > > > > +

> > > > > +

> > > > > +    if(!q->session) {

> > > > > +        ret = qsv_decode_preinit(avctx, q, pix_fmts, param);

> > > > > +        if (ret < 0)

> > > > > +            return ret;

> > > > > +    }

> > > > > +

> > > > > +    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);

> > > > > +    if (ret < 0)

> > > > > +        return ret;

> > > > > +

> > > > > +    param->mfx.CodecId = ret;

> > > > > +    ret = MFXVideoDECODE_DecodeHeader(q->session, &bs,

> > 

> > param);

> > > > > +    if (MFX_ERR_MORE_DATA == ret) {

> > > > > +       return AVERROR(EAGAIN);

> > > > > +    }

> > > > > +    if (ret < 0)

> > > > > +        return ff_qsv_print_error(avctx, ret,

> > > > > +                "Error decoding stream header");

> > > > > 

> > > > >      return 0;

> > > > >  }

> > > > > @@ -494,7 +516,10 @@ int ff_qsv_process_data(AVCodecContext

> > > > > *avctx,

> > > > > QSVContext *q,

> > > > >      uint8_t *dummy_data;

> > > > >      int dummy_size;

> > > > >      int ret;

> > > > > -    const AVPixFmtDescriptor *desc;

> > > > > +    mfxVideoParam param = { 0 };

> > > > > +    enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,

> > > > > 

> > > > 

> > > > 

> > 

> > +                                       AV_PIX_FMT_NV

> > > > 12,

> > > > > 

> > > > 

> > > > 

> > 

> > +                                       AV_PIX_FMT_N

> > > > ONE };

> > > > > 

> > > > >      if (!q->avctx_internal) {

> > > > >          q->avctx_internal = avcodec_alloc_context3(NULL); @@

> > > > 

> > > > -508,7

> > > > > +533,6 @@ int ff_qsv_process_data(AVCodecContext *avctx,

> > > > > QSVContext

> > > > > *q,

> > > > >              return AVERROR(ENOMEM);

> > > > > 

> > > > >          q->parser->flags |= PARSER_FLAG_COMPLETE_FRAMES;

> > > > > -        q->orig_pix_fmt   = AV_PIX_FMT_NONE;

> > > > >      }

> > > > > 

> > > > >      if (!pkt->size)

> > > > > @@ -521,15 +545,17 @@ int ff_qsv_process_data(AVCodecContext

> > > > 

> > > > *avctx,

> > > > > QSVContext *q,

> > > > >                       pkt->data, pkt->size, pkt->pts,

> > 

> > pkt->dts,

> > > > >                       pkt->pos);

> > > > > 

> > > > > -    avctx->field_order  = q->parser->field_order;

> > > > >      /* TODO: flush delayed frames on reinit */

> > > > > -    if (q->parser->format       != q->orig_pix_fmt    ||

> > > > > -        FFALIGN(q->parser->coded_width, 16)  !=

> > 

> > FFALIGN(avctx-

> > > > > > coded_width, 16) ||

> > > > > 

> > > > > -        FFALIGN(q->parser->coded_height, 16) !=

> > > > > FFALIGN(avctx-

> > > > > > coded_height, 16)) {

> > > > > 

> > > > > -        enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,

> > > > > 

> > > > 

> > > > 

> > 

> > -                                           AV_PIX_F

> > MT_NONE,

> > > > > 

> > > > 

> > > > 

> > 

> > -                                           AV_PIX_F

> > MT_NONE };

> > > > > -        enum AVPixelFormat qsv_format;

> > > > > +

> > > > > +    // sw_pix_fmt was initialized as NV12, will be updated

> > > > > after

> > > > > header decoded if not true.

> > > > > +    if (q->orig_pix_fmt != AV_PIX_FMT_NONE)

> > > > > +        pix_fmts[1] = q->orig_pix_fmt;

> > > > 

> > > > Well, it still seems non self-explanatory to me why we have

> > > > pix_fmts[1] here.

> > > 

> > > I think it has been explained and descripted in my comment. It

> > > means

> > > sw_pix_fmt.

> > > For more detail, probably you want to check the implementation of

> > > ff_get_format().

> > > 

> > > > > +

> > > > > +    ret = qsv_decode_header(avctx, q, pkt, pix_fmts,

> > > > > &param);

> > > > 

> > > > Parsing can be expensive to perform on each frame. See more

> > > > below.

> > > 

> > > 1. Parsing is just to reading header bits, I don't think it is

> > > CPU

> > > heavy calculation task, compared with slice level decoding.

> > > 2. Mostly only key frames have completed header. For non-key

> > > frame,

> > > just return MFX_ERR_MORE_DATA instead of real parsing work.

> > 

> > 

> > That was long when I looked myself in details of DecodeHeader, but

> > I think

> > that what the following will happen if frame is missing a header

> > and you feed

> > it into mediasdk: mediasdk will read the whole bitstream to the end

> > trying to

> > find a header. Once it will fail to find it mediasdk will return

> > MFX_ERR_MORE_DATA. I think you will be able to see that if you will

> > print

> > mfxBitstream::DataOffset and mfxBitstream::DataLength before and

> > after

> > your call for MFXVideoDECODE_DecodeHeader(). So, mediasdk actually

> > changes DataOffset either pointing to the beginning of the header

> > or right

> > after the header (I don't remember exactly, sorry). If header not

> > found  it

> > sets DataOffset=DataLength and DataLength=0 requesting new

> > bitstream

> > portion. The reason why you think everything is ok is that you

> > construct

> > mfxBitstream structure anew each time in qsv_decode_header.

> > Hence, you just don't notice that mediasdk performs a massive

> > parsing.

> > 

> > I afraid that as is your change will result in massive CPU%

> > regressions.

> 

> That is a good point, if massive CPU% regressions really happens,

> definitely it should be taken seriously. 

> Then I compared the performance and CPU usage with and without this

> patch. I can't find obvious performance drop or higher CPU usage, at

> least for h264. 

> And I take a little deep to check how MSDK search start code: https:/

> /github.com/Intel-Media-

> SDK/MediaSDK/blob/master/_studio/shared/umc/codec/h264_dec/src/umc_h2

> 64_nal_spl.cpp#L319 

> If MSDK can find the start code, not matter SPS/PPS or slice header

> start code, it will end the search. 

> Yes, mostly for non-key frame, there is no SPS/PPS. But slice header

> is still exist, thus mean MSDK don't need to search the whole

> packet. 

> For case of AnnexC, ffmpeg-qsv will convert it to be AnnexB: https://

> github.com/FFmpeg/FFmpeg/blob/master/libavcodec/qsvdec_h2645.c#L257,

> that can make sure start code is always existed. 

> 

> If you agree above, I would like to keep current implementation and

> push this patch, else it will pending the MJPEG/VP9 decoding patches.

> 

> 


Dmitry Gurulev, can you, please, help answer these questions?
Rogozhkin, Dmitry V March 16, 2019, 1:18 a.m. UTC | #10
On Mon, 2019-03-11 at 17:23 +0800, Li, Zhong wrote:
> > -----Original Message-----

> > From: Rogozhkin, Dmitry V

> > Sent: Saturday, March 9, 2019 8:48 AM

> > To: ffmpeg-devel@ffmpeg.org

> > Cc: Li, Zhong <zhong.li@intel.com>

> > Subject: Re: [FFmpeg-devel] [PATCH v3 3/6] lavc/qsvdec: Replace

> > current

> > parser with MFXVideoDECODE_DecodeHeader()

> > 

> > On Fri, 2019-03-08 at 15:40 +0800, Zhong Li wrote:

> > > Using MSDK parser can improve qsv decoder pass rate in some cases

> > > (E.g:

> > > sps declares a wrong level_idc, smaller than it should be).

> > > And it is necessary for adding new qsv decoders such as MJPEG and

> > > VP9

> > > since current parser can't provide enough information.

> > > Actually using MFXVideoDECODE_DecodeHeader() was disscussed at

> > > https://ffmpeg.org/pipermail/ffmpeg-devel/2015-July/175734.html

> > > and

> > > merged as commit 1acb19d, but was overwritten when merged libav

> > > patches (commit: 1f26a23) without any explain.

> > > 

> > > v2: split decode header from decode_init, and call it for

> > > everyframe

> > > to detect format/resoultion change. It can fix some regression

> > > issues

> > > such as hevc 10bits decoding.

> > > 

> > > Signed-off-by: Zhong Li <zhong.li@intel.com>

> > > ---

> > >  libavcodec/qsvdec.c       | 172 ++++++++++++++++++++----------

> > > ----

> > > ----

> > >  libavcodec/qsvdec.h       |   2 +

> > >  libavcodec/qsvdec_h2645.c |   1 +

> > >  libavcodec/qsvdec_other.c |   1 +

> > >  4 files changed, 93 insertions(+), 83 deletions(-)

> > > 

> > > diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c index

> > > 4a0be811fb..b78026e14d 100644

> > > --- a/libavcodec/qsvdec.c

> > > +++ b/libavcodec/qsvdec.c

> > > @@ -120,19 +120,17 @@ static inline unsigned int

> > > qsv_fifo_size(const

> > > AVFifoBuffer* fifo)

> > >      return av_fifo_size(fifo) / qsv_fifo_item_size();

> > >  }

> > > 

> > > -static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q)

> > > +static int qsv_decode_preinit(AVCodecContext *avctx, QSVContext

> > > *q,

> > > enum AVPixelFormat *pix_fmts, mfxVideoParam *param)

> > >  {

> > > -    const AVPixFmtDescriptor *desc;

> > >      mfxSession session = NULL;

> > >      int iopattern = 0;

> > > -    mfxVideoParam param = { 0 };

> > > -    int frame_width  = avctx->coded_width;

> > > -    int frame_height = avctx->coded_height;

> > >      int ret;

> > > 

> > > -    desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);

> > > -    if (!desc)

> > > -        return AVERROR_BUG;

> > > +    ret = ff_get_format(avctx, pix_fmts);

> > > +    if (ret < 0) {

> > > +        q->orig_pix_fmt = avctx->pix_fmt = AV_PIX_FMT_NONE;

> > > +        return ret;

> > > +    }

> > > 

> > >      if (!q->async_fifo) {

> > >          q->async_fifo = av_fifo_alloc(q->async_depth *

> > > qsv_fifo_item_size()); @@ -170,48 +168,72 @@ static int

> > > qsv_decode_init(AVCodecContext *avctx, QSVContext *q)

> > >          return ret;

> > >      }

> > > 

> > > -    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);

> > > -    if (ret < 0)

> > > -        return ret;

> > > +    param->IOPattern   = q->iopattern;

> > > +    param->AsyncDepth  = q->async_depth;

> > > +    param->ExtParam    = q->ext_buffers;

> > > +    param->NumExtParam = q->nb_ext_buffers;

> > > 

> > > -    param.mfx.CodecId      = ret;

> > > -    param.mfx.CodecProfile = ff_qsv_profile_to_mfx(avctx-

> > > >codec_id,

> > > avctx->profile);

> > > -    param.mfx.CodecLevel   = avctx->level ==

> > 

> > FF_LEVEL_UNKNOWN ?

> > > MFX_LEVEL_UNKNOWN : avctx->level;

> > > -

> > > -    param.mfx.FrameInfo.BitDepthLuma   = desc->comp[0].depth;

> > > -    param.mfx.FrameInfo.BitDepthChroma = desc->comp[0].depth;

> > > -    param.mfx.FrameInfo.Shift          = desc->comp[0].depth >

> > 

> > 8;

> > > -    param.mfx.FrameInfo.FourCC         = q->fourcc;

> > > -    param.mfx.FrameInfo.Width          = frame_width;

> > > -    param.mfx.FrameInfo.Height         = frame_height;

> > > -    param.mfx.FrameInfo.ChromaFormat   =

> > 

> > MFX_CHROMAFORMAT_YUV420;

> > > -

> > > -    switch (avctx->field_order) {

> > > -    case AV_FIELD_PROGRESSIVE:

> > > -        param.mfx.FrameInfo.PicStruct =

> > 

> > MFX_PICSTRUCT_PROGRESSIVE;

> > > -        break;

> > > -    case AV_FIELD_TT:

> > > -        param.mfx.FrameInfo.PicStruct =

> > 

> > MFX_PICSTRUCT_FIELD_TFF;

> > > -        break;

> > > -    case AV_FIELD_BB:

> > > -        param.mfx.FrameInfo.PicStruct =

> > 

> > MFX_PICSTRUCT_FIELD_BFF;

> > > -        break;

> > > -    default:

> > > -        param.mfx.FrameInfo.PicStruct =

> > 

> > MFX_PICSTRUCT_UNKNOWN;

> > > -        break;

> > > -    }

> > > +    return 0;

> > > + }

> > > 

> > > -    param.IOPattern   = q->iopattern;

> > > -    param.AsyncDepth  = q->async_depth;

> > > -    param.ExtParam    = q->ext_buffers;

> > > -    param.NumExtParam = q->nb_ext_buffers;

> > > +static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q,

> > > mfxVideoParam *param)

> > > +{

> > > +    int ret;

> > > 

> > > -    ret = MFXVideoDECODE_Init(q->session, &param);

> > > +    avctx->width        = param->mfx.FrameInfo.CropW;

> > > +    avctx->height       = param->mfx.FrameInfo.CropH;

> > > +    avctx->coded_width  = param->mfx.FrameInfo.Width;

> > > +    avctx->coded_height = param->mfx.FrameInfo.Height;

> > > +    avctx->level        = param->mfx.CodecLevel;

> > > +    avctx->profile      = param->mfx.CodecProfile;

> > > +    avctx->field_order  = ff_qsv_map_picstruct(param-

> > > > mfx.FrameInfo.PicStruct);

> > > 

> > > +    avctx->pix_fmt      = ff_qsv_map_fourcc(param-

> > > > mfx.FrameInfo.FourCC);

> > > 

> > > +

> > > +    ret = MFXVideoDECODE_Init(q->session, param);

> > >      if (ret < 0)

> > >          return ff_qsv_print_error(avctx, ret,

> > >                                    "Error initializing the

> > 

> > MFX video

> > > decoder");

> > > 

> > > -    q->frame_info = param.mfx.FrameInfo;

> > > +    q->frame_info = param->mfx.FrameInfo;

> > > +

> > > +    return 0;

> > > +}

> > > +

> > > +static int qsv_decode_header(AVCodecContext *avctx, QSVContext

> > > *q,

> > > AVPacket *avpkt, enum AVPixelFormat *pix_fmts, mfxVideoParam

> > > *param)

> > > +{

> > > +    int ret;

> > > +

> > > +    mfxBitstream bs = { { { 0 } } };

> > > +

> > > +    if (avpkt->size) {

> > > +        bs.Data       = avpkt->data;

> > > +        bs.DataLength = avpkt->size;

> > > +        bs.MaxLength  = bs.DataLength;

> > > +        bs.TimeStamp  = avpkt->pts;

> > > +        if (avctx->field_order == AV_FIELD_PROGRESSIVE)

> > > +            bs.DataFlag   |=

> > 

> > MFX_BITSTREAM_COMPLETE_FRAME;

> > > +    } else

> > > +        return AVERROR_INVALIDDATA;

> > > +

> > > +

> > > +    if(!q->session) {

> > > +        ret = qsv_decode_preinit(avctx, q, pix_fmts, param);

> > > +        if (ret < 0)

> > > +            return ret;

> > > +    }

> > > +

> > > +    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);

> > > +    if (ret < 0)

> > > +        return ret;

> > > +

> > > +    param->mfx.CodecId = ret;

> > > +    ret = MFXVideoDECODE_DecodeHeader(q->session, &bs, param);

> > > +    if (MFX_ERR_MORE_DATA == ret) {

> > > +       return AVERROR(EAGAIN);

> > > +    }

> > > +    if (ret < 0)

> > > +        return ff_qsv_print_error(avctx, ret,

> > > +                "Error decoding stream header");

> > > 

> > >      return 0;

> > >  }

> > > @@ -494,7 +516,10 @@ int ff_qsv_process_data(AVCodecContext

> > > *avctx,

> > > QSVContext *q,

> > >      uint8_t *dummy_data;

> > >      int dummy_size;

> > >      int ret;

> > > -    const AVPixFmtDescriptor *desc;

> > > +    mfxVideoParam param = { 0 };

> > > +    enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,

> > > 

> > 

> > +                                       AV_PIX_FMT_NV

> > 12,

> > > 

> > 

> > +                                       AV_PIX_FMT_N

> > ONE };

> > > 

> > >      if (!q->avctx_internal) {

> > >          q->avctx_internal = avcodec_alloc_context3(NULL); @@

> > 

> > -508,7

> > > +533,6 @@ int ff_qsv_process_data(AVCodecContext *avctx,

> > > QSVContext

> > > *q,

> > >              return AVERROR(ENOMEM);

> > > 

> > >          q->parser->flags |= PARSER_FLAG_COMPLETE_FRAMES;

> > > -        q->orig_pix_fmt   = AV_PIX_FMT_NONE;

> > >      }

> > > 

> > >      if (!pkt->size)

> > > @@ -521,15 +545,17 @@ int ff_qsv_process_data(AVCodecContext

> > 

> > *avctx,

> > > QSVContext *q,

> > >                       pkt->data, pkt->size, pkt->pts, pkt->dts,

> > >                       pkt->pos);

> > > 

> > > -    avctx->field_order  = q->parser->field_order;

> > >      /* TODO: flush delayed frames on reinit */

> > > -    if (q->parser->format       != q->orig_pix_fmt    ||

> > > -        FFALIGN(q->parser->coded_width, 16)  != FFALIGN(avctx-

> > > > coded_width, 16) ||

> > > 

> > > -        FFALIGN(q->parser->coded_height, 16) != FFALIGN(avctx-

> > > > coded_height, 16)) {

> > > 

> > > -        enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,

> > > 

> > 

> > -                                           AV_PIX_F

> > MT_NONE,

> > > 

> > 

> > -                                           AV_PIX_F

> > MT_NONE };

> > > -        enum AVPixelFormat qsv_format;

> > > +

> > > +    // sw_pix_fmt was initialized as NV12, will be updated after

> > > header decoded if not true.

> > > +    if (q->orig_pix_fmt != AV_PIX_FMT_NONE)

> > > +        pix_fmts[1] = q->orig_pix_fmt;

> > 

> > Well, it still seems non self-explanatory to me why we have

> > pix_fmts[1] here.

> 

> I think it has been explained and descripted in my comment. It means

> sw_pix_fmt.

> For more detail, probably you want to check the implementation of

> ff_get_format().

> 

> > > +

> > > +    ret = qsv_decode_header(avctx, q, pkt, pix_fmts, &param);

> > 

> > Parsing can be expensive to perform on each frame. See more below.

> 

> 1. Parsing is just to reading header bits, I don't think it is CPU

> heavy calculation task, compared with slice level decoding. 

> 2. Mostly only key frames have completed header. For non-key frame,

> just return MFX_ERR_MORE_DATA instead of real parsing work. 

> 

> > > +

> > > +    if (ret >= 0 && (q->orig_pix_fmt !=

> > > ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC) ||

> > > +        avctx->coded_width  != param.mfx.FrameInfo.Width ||

> > > +        avctx->coded_height != param.mfx.FrameInfo.Height)) {

> > >          AVPacket zero_pkt = {0};

> > 

> > You are actually trying to perform the task which is being done by

> > mediasdk.

> > Moreover, you don't detect all the cases when mediasdk decoder will

> > require

> > to be reset. For example, you may have 2 streams concatenated which

> > have

> > perfectly the same resolution, but still decoder reset will be

> > required because

> > of profile/level/gop structure changes and memory reallocation or

> > better to

> > say additional allocation could also be required because mediasdk

> > may

> > require different (bigger or lesser) number of surfaces to properly

> > handle

> > new bitstream. See more below.

> 

> Which case of gop structure changing need to reset? Could you please,

> give an example.


Dmitry Gurulev, can you, please, help answer these questions?

> 

> > > 

> > >          if (q->buffered_count) {

> > > @@ -538,45 +564,24 @@ int ff_qsv_process_data(AVCodecContext

> > 

> > *avctx,

> > > QSVContext *q,

> > >              q->buffered_count--;

> > >              return qsv_decode(avctx, q, frame, got_frame,

> > 

> > &zero_pkt);

> > >          }

> > > -

> > >          q->reinit_flag = 0;

> > > 

> > > -        qsv_format = ff_qsv_map_pixfmt(q->parser->format, &q-

> > > > fourcc);

> > > 

> > > -        if (qsv_format < 0) {

> > > -            av_log(avctx, AV_LOG_ERROR,

> > > -                   "Decoding pixel format '%s' is not

> > 

> > supported\n",

> > > -                   av_get_pix_fmt_name(q->parser->format));

> > > -            ret = AVERROR(ENOSYS);

> > > -            goto reinit_fail;

> > > -        }

> > > +        q->orig_pix_fmt = avctx->pix_fmt = pix_fmts[1] =

> > > ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC);

> > > 

> > > -        q->orig_pix_fmt     = q->parser->format;

> > > -        avctx->pix_fmt      = pix_fmts[1] = qsv_format;

> > > -        avctx->width        = q->parser->width;

> > > -        avctx->height       = q->parser->height;

> > > -        avctx->coded_width  = FFALIGN(q->parser->coded_width,

> > 

> > 16);

> > > -        avctx->coded_height = FFALIGN(q->parser->coded_height,

> > 

> > 16);

> > > -        avctx->level        = q->avctx_internal->level;

> > > -        avctx->profile      = q->avctx_internal->profile;

> > > +        avctx->coded_width  = param.mfx.FrameInfo.Width;

> > > +        avctx->coded_height = param.mfx.FrameInfo.Height;

> > > 

> > > -        ret = ff_get_format(avctx, pix_fmts);

> > > +        ret = qsv_decode_preinit(avctx, q, pix_fmts, &param);

> > >          if (ret < 0)

> > >              goto reinit_fail;

> > > +        q->initialized = 0;

> > > +    }

> > > 

> > > -        avctx->pix_fmt = ret;

> > > -

> > > -        desc = av_pix_fmt_desc_get(avctx->pix_fmt);

> > > -        if (!desc)

> > > -            goto reinit_fail;

> > > -

> > > -         if (desc->comp[0].depth > 8) {

> > > -            avctx->coded_width

> > 

> > =  FFALIGN(q->parser->coded_width,

> > > 32);

> > > -            avctx->coded_height =

> > 

> > FFALIGN(q->parser->coded_height,

> > > 32);

> > > -        }

> > > -

> > > -        ret = qsv_decode_init(avctx, q);

> > > +    if (!q->initialized) {

> > > +        ret = qsv_decode_init(avctx, q, &param);

> > >          if (ret < 0)

> > >              goto reinit_fail;

> > > +        q->initialized = 1;

> > >      }

> > > 

> > >      return qsv_decode(avctx, q, frame, got_frame, pkt);

> > 

> > So, from my perspective you should not attempt to call decode

> > header on

> > each incoming data portion and try to interpret results. Instead

> > you should

> > respect what mediasdk DecodeFrameAsync is returning. Basically it

> > will

> > return MFX_ERR_INCOMPATIBLE_VIDEO_PARAM is decoder reset is

> > required.

> > This gives you 2 situations when you should call

> > DecodeHeader:

> > 1. You either call it if decoder is not initalized at all 2. Or you

> > call it upon

> > receiving MFX_ERR_INCOMPATIBLE_VIDEO_PARAM

> > 

> > Once you received MFX_ERR_INCOMPATIBLE_VIDEO_PARAM step to

> > perform are:

> > 1. Flush cached frames from medisdk decoder passing nullptr

> > bitstream to

> > DecodeFrameAsync 2. Run decode header and get updated parameters 3.

> > Check (via QueryIOSurf) and potentially reallocate surfaces to

> > satisfy new

> > bitstream requirement 4. Reset decoder with new parameters

> 

> 1. This patch is just to replace original parser to MSDK parser. I

> don't like to mix other thing here (thus probably introduces new

> bugs, but full round testing has been done for this patch).

>   If it is really necessary, should be done by an additional patch. 

> 2. Unfortunately, if we don't reset and set correct

> resolution/pix_fmt, FFmpeg-qsv already failed in the progress of MSDK

> initialization (e,g: pass a NV12 format but actually it is P010. see

> : https://github.com/FFmpeg/FFmpeg/blob/master/libavcodec/qsv.c#L424

> )

>   There is no opportunity to get the status of DecodeFrameAsync at

> these case. I haven't compared MSDK sample decoder with current

> FFmpeg (Will take a look later). But IIRC, MSDK sample decoder isn't

> doing a better job than ffmpeg-qsv for reinit case.

>   (one example, it is hang when run sample_decode h264 -i /fate-

> suite/h264/reinit-large_420_8-to-small_420_8.h264 -r , and this clip

> is changing from a larger resolution to a smaller one. I wonder to

> know what will happen if change resolution from smaller to a larger,

> eg, 720x480 to 1920x1080).

> 3. IMHO, reinit work is better to do before decoding, instead of

> decoding it and recover it after it is failed. 

> 4. This a common function for all decoders, I would like to see it is

> under control of ffmpeg. MSDK may have no unified implementation of

> all codecs ( e.g: https://github.com/Intel-Media-SDK/MediaSDK/issues/

> 333 ).

>   Your suggestion may be addressed only when I see any new issues or

> bugs. But currently I can't see the necessity. 

> 

> > Application may attempt to optimize procedure on step 3 above and

> > avoid

> > surfaces reallocation if possible. For example, if you have

> > allocated

> > 1920x1080 surfaces and resolution changed to 720x480, you may

> > actually

> > stay on 1920x1080 if you are ok to use more memory hoping to

> > receive

> > 1920x1080 stream in the future again. Eventually this would work if

> > you have

> > enough number of 1920x1080 surfaces.

> 

> Hm, this is a simpler topic, https://patchwork.ffmpeg.org/patch/12257

> / is to prepare this though decoder should be updated too. But as

> previous discussion, thus should be a separated patch and need pre-

> testing.
Mark Thompson March 17, 2019, 2:20 p.m. UTC | #11
On 08/03/2019 07:40, Zhong Li wrote:
> Using MSDK parser can improve qsv decoder pass rate in some cases (E.g:
> sps declares a wrong level_idc, smaller than it should be).
> And it is necessary for adding new qsv decoders such as MJPEG and VP9
> since current parser can't provide enough information.
> Actually using MFXVideoDECODE_DecodeHeader() was disscussed at
> https://ffmpeg.org/pipermail/ffmpeg-devel/2015-July/175734.html and merged as commit 1acb19d,
> but was overwritten when merged libav patches (commit: 1f26a23) without any explain.
> 
> v2: split decode header from decode_init, and call it for everyframe to
> detect format/resoultion change. It can fix some regression issues such
> as hevc 10bits decoding.
> 
> Signed-off-by: Zhong Li <zhong.li@intel.com>
> ---
>  libavcodec/qsvdec.c       | 172 ++++++++++++++++++++------------------
>  libavcodec/qsvdec.h       |   2 +
>  libavcodec/qsvdec_h2645.c |   1 +
>  libavcodec/qsvdec_other.c |   1 +
>  4 files changed, 93 insertions(+), 83 deletions(-)
> 
> diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c
> index 4a0be811fb..b78026e14d 100644
> --- a/libavcodec/qsvdec.c
> +++ b/libavcodec/qsvdec.c
> @@ -120,19 +120,17 @@ static inline unsigned int qsv_fifo_size(const AVFifoBuffer* fifo)
>      return av_fifo_size(fifo) / qsv_fifo_item_size();
>  }
>  
> -static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q)
> +static int qsv_decode_preinit(AVCodecContext *avctx, QSVContext *q, enum AVPixelFormat *pix_fmts, mfxVideoParam *param)
>  {
> -    const AVPixFmtDescriptor *desc;
>      mfxSession session = NULL;
>      int iopattern = 0;
> -    mfxVideoParam param = { 0 };
> -    int frame_width  = avctx->coded_width;
> -    int frame_height = avctx->coded_height;
>      int ret;
>  
> -    desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
> -    if (!desc)
> -        return AVERROR_BUG;
> +    ret = ff_get_format(avctx, pix_fmts);
> +    if (ret < 0) {
> +        q->orig_pix_fmt = avctx->pix_fmt = AV_PIX_FMT_NONE;
> +        return ret;
> +    }
>  
>      if (!q->async_fifo) {
>          q->async_fifo = av_fifo_alloc(q->async_depth * qsv_fifo_item_size());
> @@ -170,48 +168,72 @@ static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q)
>          return ret;
>      }
>  
> -    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);
> -    if (ret < 0)
> -        return ret;
> +    param->IOPattern   = q->iopattern;
> +    param->AsyncDepth  = q->async_depth;
> +    param->ExtParam    = q->ext_buffers;
> +    param->NumExtParam = q->nb_ext_buffers;
>  
> -    param.mfx.CodecId      = ret;
> -    param.mfx.CodecProfile = ff_qsv_profile_to_mfx(avctx->codec_id, avctx->profile);
> -    param.mfx.CodecLevel   = avctx->level == FF_LEVEL_UNKNOWN ? MFX_LEVEL_UNKNOWN : avctx->level;
> -
> -    param.mfx.FrameInfo.BitDepthLuma   = desc->comp[0].depth;
> -    param.mfx.FrameInfo.BitDepthChroma = desc->comp[0].depth;
> -    param.mfx.FrameInfo.Shift          = desc->comp[0].depth > 8;
> -    param.mfx.FrameInfo.FourCC         = q->fourcc;
> -    param.mfx.FrameInfo.Width          = frame_width;
> -    param.mfx.FrameInfo.Height         = frame_height;
> -    param.mfx.FrameInfo.ChromaFormat   = MFX_CHROMAFORMAT_YUV420;
> -
> -    switch (avctx->field_order) {
> -    case AV_FIELD_PROGRESSIVE:
> -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_PROGRESSIVE;
> -        break;
> -    case AV_FIELD_TT:
> -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_FIELD_TFF;
> -        break;
> -    case AV_FIELD_BB:
> -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_FIELD_BFF;
> -        break;
> -    default:
> -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_UNKNOWN;
> -        break;
> -    }
> +    return 0;
> + }
>  
> -    param.IOPattern   = q->iopattern;
> -    param.AsyncDepth  = q->async_depth;
> -    param.ExtParam    = q->ext_buffers;
> -    param.NumExtParam = q->nb_ext_buffers;
> +static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q, mfxVideoParam *param)
> +{
> +    int ret;
>  
> -    ret = MFXVideoDECODE_Init(q->session, &param);
> +    avctx->width        = param->mfx.FrameInfo.CropW;
> +    avctx->height       = param->mfx.FrameInfo.CropH;
> +    avctx->coded_width  = param->mfx.FrameInfo.Width;
> +    avctx->coded_height = param->mfx.FrameInfo.Height;
> +    avctx->level        = param->mfx.CodecLevel;
> +    avctx->profile      = param->mfx.CodecProfile;

This feels insufficient for the UNKNOWN cases?  (VP8 in particular has no profiles.)

> +    avctx->field_order  = ff_qsv_map_picstruct(param->mfx.FrameInfo.PicStruct);
> +    avctx->pix_fmt      = ff_qsv_map_fourcc(param->mfx.FrameInfo.FourCC);
> +
> +    ret = MFXVideoDECODE_Init(q->session, param);
>      if (ret < 0)
>          return ff_qsv_print_error(avctx, ret,
>                                    "Error initializing the MFX video decoder");
>  
> -    q->frame_info = param.mfx.FrameInfo;
> +    q->frame_info = param->mfx.FrameInfo;
> +
> +    return 0;
> +}
> +
> +static int qsv_decode_header(AVCodecContext *avctx, QSVContext *q, AVPacket *avpkt, enum AVPixelFormat *pix_fmts, mfxVideoParam *param)
> +{
> +    int ret;
> +
> +    mfxBitstream bs = { { { 0 } } };

{ 0 }

> +
> +    if (avpkt->size) {
> +        bs.Data       = avpkt->data;
> +        bs.DataLength = avpkt->size;
> +        bs.MaxLength  = bs.DataLength;
> +        bs.TimeStamp  = avpkt->pts;
> +        if (avctx->field_order == AV_FIELD_PROGRESSIVE)
> +            bs.DataFlag   |= MFX_BITSTREAM_COMPLETE_FRAME;
> +    } else
> +        return AVERROR_INVALIDDATA;
> +
> +
> +    if(!q->session) {
> +        ret = qsv_decode_preinit(avctx, q, pix_fmts, param);

I think you've misunderstood some of the mechanism of the get_format callback.  It can't be called before you have parsed out the stream information, because you don't know the necessary size and format information for the user to allocate the frames.  (Note that some of this is hinted if you use libavformat and copy the codec parameters from there, leading to cases where it mostly works in the ffmpeg utility but fails completely with libavcodec-only users.)

Given the chicken-egg problem here for library initialisation, maybe what you want is to switch to using AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX?

> +        if (ret < 0)
> +            return ret;
> +    }
> +
> +    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);
> +    if (ret < 0)
> +        return ret;
> +
> +    param->mfx.CodecId = ret;
> +    ret = MFXVideoDECODE_DecodeHeader(q->session, &bs, param);
> +    if (MFX_ERR_MORE_DATA == ret) {
> +       return AVERROR(EAGAIN);
> +    }
> +    if (ret < 0)
> +        return ff_qsv_print_error(avctx, ret,
> +                "Error decoding stream header");
>  
>      return 0;
>  }
> @@ -494,7 +516,10 @@ int ff_qsv_process_data(AVCodecContext *avctx, QSVContext *q,
>      uint8_t *dummy_data;
>      int dummy_size;
>      int ret;
> -    const AVPixFmtDescriptor *desc;
> +    mfxVideoParam param = { 0 };
> +    enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,
> +                                       AV_PIX_FMT_NV12,
> +                                       AV_PIX_FMT_NONE };
>  
>      if (!q->avctx_internal) {
>          q->avctx_internal = avcodec_alloc_context3(NULL);
> @@ -508,7 +533,6 @@ int ff_qsv_process_data(AVCodecContext *avctx, QSVContext *q,
>              return AVERROR(ENOMEM);
>  
>          q->parser->flags |= PARSER_FLAG_COMPLETE_FRAMES;
> -        q->orig_pix_fmt   = AV_PIX_FMT_NONE;
>      }
>  
>      if (!pkt->size)
> @@ -521,15 +545,17 @@ int ff_qsv_process_data(AVCodecContext *avctx, QSVContext *q,
>                       pkt->data, pkt->size, pkt->pts, pkt->dts,
>                       pkt->pos);
>  
> -    avctx->field_order  = q->parser->field_order;
>      /* TODO: flush delayed frames on reinit */
> -    if (q->parser->format       != q->orig_pix_fmt    ||
> -        FFALIGN(q->parser->coded_width, 16)  != FFALIGN(avctx->coded_width, 16) ||
> -        FFALIGN(q->parser->coded_height, 16) != FFALIGN(avctx->coded_height, 16)) {
> -        enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,
> -                                           AV_PIX_FMT_NONE,
> -                                           AV_PIX_FMT_NONE };
> -        enum AVPixelFormat qsv_format;
> +
> +    // sw_pix_fmt was initialized as NV12, will be updated after header decoded if not true.
> +    if (q->orig_pix_fmt != AV_PIX_FMT_NONE)
> +        pix_fmts[1] = q->orig_pix_fmt;
> +
> +    ret = qsv_decode_header(avctx, q, pkt, pix_fmts, &param);
> +
> +    if (ret >= 0 && (q->orig_pix_fmt != ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC) ||
> +        avctx->coded_width  != param.mfx.FrameInfo.Width ||
> +        avctx->coded_height != param.mfx.FrameInfo.Height)) {
>          AVPacket zero_pkt = {0};
>  
>          if (q->buffered_count) {
> @@ -538,45 +564,24 @@ int ff_qsv_process_data(AVCodecContext *avctx, QSVContext *q,
>              q->buffered_count--;
>              return qsv_decode(avctx, q, frame, got_frame, &zero_pkt);
>          }
> -
>          q->reinit_flag = 0;
>  
> -        qsv_format = ff_qsv_map_pixfmt(q->parser->format, &q->fourcc);
> -        if (qsv_format < 0) {
> -            av_log(avctx, AV_LOG_ERROR,
> -                   "Decoding pixel format '%s' is not supported\n",
> -                   av_get_pix_fmt_name(q->parser->format));
> -            ret = AVERROR(ENOSYS);
> -            goto reinit_fail;
> -        }
> +        q->orig_pix_fmt = avctx->pix_fmt = pix_fmts[1] = ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC);
>  
> -        q->orig_pix_fmt     = q->parser->format;
> -        avctx->pix_fmt      = pix_fmts[1] = qsv_format;
> -        avctx->width        = q->parser->width;
> -        avctx->height       = q->parser->height;
> -        avctx->coded_width  = FFALIGN(q->parser->coded_width, 16);
> -        avctx->coded_height = FFALIGN(q->parser->coded_height, 16);
> -        avctx->level        = q->avctx_internal->level;
> -        avctx->profile      = q->avctx_internal->profile;
> +        avctx->coded_width  = param.mfx.FrameInfo.Width;
> +        avctx->coded_height = param.mfx.FrameInfo.Height;
>  
> -        ret = ff_get_format(avctx, pix_fmts);
> +        ret = qsv_decode_preinit(avctx, q, pix_fmts, &param);
>          if (ret < 0)
>              goto reinit_fail;
> +        q->initialized = 0;
> +    }
>  
> -        avctx->pix_fmt = ret;
> -
> -        desc = av_pix_fmt_desc_get(avctx->pix_fmt);
> -        if (!desc)
> -            goto reinit_fail;
> -
> -         if (desc->comp[0].depth > 8) {
> -            avctx->coded_width =  FFALIGN(q->parser->coded_width, 32);
> -            avctx->coded_height = FFALIGN(q->parser->coded_height, 32);
> -        }
> -
> -        ret = qsv_decode_init(avctx, q);
> +    if (!q->initialized) {
> +        ret = qsv_decode_init(avctx, q, &param);
>          if (ret < 0)
>              goto reinit_fail;
> +        q->initialized = 1;
>      }
>  
>      return qsv_decode(avctx, q, frame, got_frame, pkt);
> @@ -589,4 +594,5 @@ reinit_fail:
>  void ff_qsv_decode_flush(AVCodecContext *avctx, QSVContext *q)
>  {
>      q->orig_pix_fmt = AV_PIX_FMT_NONE;
> +    q->initialized = 0;
>  }
> diff --git a/libavcodec/qsvdec.h b/libavcodec/qsvdec.h
> index 111536caba..4812fb2a6b 100644
> --- a/libavcodec/qsvdec.h
> +++ b/libavcodec/qsvdec.h
> @@ -63,6 +63,8 @@ typedef struct QSVContext {
>      uint32_t fourcc;
>      mfxFrameInfo frame_info;
>  
> +    int initialized;
> +
>      // options set by the caller
>      int async_depth;
>      int iopattern;
> diff --git a/libavcodec/qsvdec_h2645.c b/libavcodec/qsvdec_h2645.c
> index 9b49f5506e..eb1dc336a4 100644
> --- a/libavcodec/qsvdec_h2645.c
> +++ b/libavcodec/qsvdec_h2645.c
> @@ -103,6 +103,7 @@ static av_cold int qsv_decode_init(AVCodecContext *avctx)
>          }
>      }
>  
> +    s->qsv.orig_pix_fmt = AV_PIX_FMT_NV12;

This setting looks very suspicious?  The real value is in the stream header, and you don't want to do anything with pixfmts until you've read it.

>      s->packet_fifo = av_fifo_alloc(sizeof(AVPacket));
>      if (!s->packet_fifo) {
>          ret = AVERROR(ENOMEM);
> diff --git a/libavcodec/qsvdec_other.c b/libavcodec/qsvdec_other.c
> index 03251d2c85..a6f1b88ca0 100644
> --- a/libavcodec/qsvdec_other.c
> +++ b/libavcodec/qsvdec_other.c
> @@ -90,6 +90,7 @@ static av_cold int qsv_decode_init(AVCodecContext *avctx)
>      }
>  #endif
>  
> +    s->qsv.orig_pix_fmt = AV_PIX_FMT_NV12;
>      s->packet_fifo = av_fifo_alloc(sizeof(AVPacket));
>      if (!s->packet_fifo) {
>          ret = AVERROR(ENOMEM);
> 

I have to admit I'm still sceptical that this change is useful.  It seems like it would be better to fix up your cases where libmfx barfs on streams with bad profile or level information, and then to add the parser support for your additional codecs.

- Mark
Zhong Li March 18, 2019, 3:22 a.m. UTC | #12
> From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On Behalf

> Of Mark Thompson

> Sent: Sunday, March 17, 2019 10:20 PM

> To: ffmpeg-devel@ffmpeg.org

> Subject: Re: [FFmpeg-devel] [PATCH v3 3/6] lavc/qsvdec: Replace current

> parser with MFXVideoDECODE_DecodeHeader()

> 

> On 08/03/2019 07:40, Zhong Li wrote:

> > Using MSDK parser can improve qsv decoder pass rate in some cases (E.g:

> > sps declares a wrong level_idc, smaller than it should be).

> > And it is necessary for adding new qsv decoders such as MJPEG and VP9

> > since current parser can't provide enough information.

> > Actually using MFXVideoDECODE_DecodeHeader() was disscussed at

> > https://ffmpeg.org/pipermail/ffmpeg-devel/2015-July/175734.html and

> > merged as commit 1acb19d, but was overwritten when merged libav

> patches (commit: 1f26a23) without any explain.

> >

> > v2: split decode header from decode_init, and call it for everyframe

> > to detect format/resoultion change. It can fix some regression issues

> > such as hevc 10bits decoding.

> >

> > Signed-off-by: Zhong Li <zhong.li@intel.com>

> > ---

> >  libavcodec/qsvdec.c       | 172

> ++++++++++++++++++++------------------

> >  libavcodec/qsvdec.h       |   2 +

> >  libavcodec/qsvdec_h2645.c |   1 +

> >  libavcodec/qsvdec_other.c |   1 +

> >  4 files changed, 93 insertions(+), 83 deletions(-)

> >

> > diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c index

> > 4a0be811fb..b78026e14d 100644

> > --- a/libavcodec/qsvdec.c

> > +++ b/libavcodec/qsvdec.c

> > @@ -120,19 +120,17 @@ static inline unsigned int qsv_fifo_size(const

> AVFifoBuffer* fifo)

> >      return av_fifo_size(fifo) / qsv_fifo_item_size();  }

> >

> > -static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q)

> > +static int qsv_decode_preinit(AVCodecContext *avctx, QSVContext *q,

> > +enum AVPixelFormat *pix_fmts, mfxVideoParam *param)

> >  {

> > -    const AVPixFmtDescriptor *desc;

> >      mfxSession session = NULL;

> >      int iopattern = 0;

> > -    mfxVideoParam param = { 0 };

> > -    int frame_width  = avctx->coded_width;

> > -    int frame_height = avctx->coded_height;

> >      int ret;

> >

> > -    desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);

> > -    if (!desc)

> > -        return AVERROR_BUG;

> > +    ret = ff_get_format(avctx, pix_fmts);

> > +    if (ret < 0) {

> > +        q->orig_pix_fmt = avctx->pix_fmt = AV_PIX_FMT_NONE;

> > +        return ret;

> > +    }

> >

> >      if (!q->async_fifo) {

> >          q->async_fifo = av_fifo_alloc(q->async_depth *

> > qsv_fifo_item_size()); @@ -170,48 +168,72 @@ static int

> qsv_decode_init(AVCodecContext *avctx, QSVContext *q)

> >          return ret;

> >      }

> >

> > -    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);

> > -    if (ret < 0)

> > -        return ret;

> > +    param->IOPattern   = q->iopattern;

> > +    param->AsyncDepth  = q->async_depth;

> > +    param->ExtParam    = q->ext_buffers;

> > +    param->NumExtParam = q->nb_ext_buffers;

> >

> > -    param.mfx.CodecId      = ret;

> > -    param.mfx.CodecProfile = ff_qsv_profile_to_mfx(avctx->codec_id,

> avctx->profile);

> > -    param.mfx.CodecLevel   = avctx->level == FF_LEVEL_UNKNOWN ?

> MFX_LEVEL_UNKNOWN : avctx->level;

> > -

> > -    param.mfx.FrameInfo.BitDepthLuma   = desc->comp[0].depth;

> > -    param.mfx.FrameInfo.BitDepthChroma = desc->comp[0].depth;

> > -    param.mfx.FrameInfo.Shift          = desc->comp[0].depth > 8;

> > -    param.mfx.FrameInfo.FourCC         = q->fourcc;

> > -    param.mfx.FrameInfo.Width          = frame_width;

> > -    param.mfx.FrameInfo.Height         = frame_height;

> > -    param.mfx.FrameInfo.ChromaFormat   =

> MFX_CHROMAFORMAT_YUV420;

> > -

> > -    switch (avctx->field_order) {

> > -    case AV_FIELD_PROGRESSIVE:

> > -        param.mfx.FrameInfo.PicStruct =

> MFX_PICSTRUCT_PROGRESSIVE;

> > -        break;

> > -    case AV_FIELD_TT:

> > -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_FIELD_TFF;

> > -        break;

> > -    case AV_FIELD_BB:

> > -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_FIELD_BFF;

> > -        break;

> > -    default:

> > -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_UNKNOWN;

> > -        break;

> > -    }

> > +    return 0;

> > + }

> >

> > -    param.IOPattern   = q->iopattern;

> > -    param.AsyncDepth  = q->async_depth;

> > -    param.ExtParam    = q->ext_buffers;

> > -    param.NumExtParam = q->nb_ext_buffers;

> > +static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q,

> > +mfxVideoParam *param) {

> > +    int ret;

> >

> > -    ret = MFXVideoDECODE_Init(q->session, &param);

> > +    avctx->width        = param->mfx.FrameInfo.CropW;

> > +    avctx->height       = param->mfx.FrameInfo.CropH;

> > +    avctx->coded_width  = param->mfx.FrameInfo.Width;

> > +    avctx->coded_height = param->mfx.FrameInfo.Height;

> > +    avctx->level        = param->mfx.CodecLevel;

> > +    avctx->profile      = param->mfx.CodecProfile;

> 

> This feels insufficient for the UNKNOWN cases?  (VP8 in particular has no

> profiles.)

> 

> > +    avctx->field_order  =

> ff_qsv_map_picstruct(param->mfx.FrameInfo.PicStruct);

> > +    avctx->pix_fmt      =

> ff_qsv_map_fourcc(param->mfx.FrameInfo.FourCC);

> > +

> > +    ret = MFXVideoDECODE_Init(q->session, param);

> >      if (ret < 0)

> >          return ff_qsv_print_error(avctx, ret,

> >                                    "Error initializing the MFX

> video

> > decoder");

> >

> > -    q->frame_info = param.mfx.FrameInfo;

> > +    q->frame_info = param->mfx.FrameInfo;

> > +

> > +    return 0;

> > +}

> > +

> > +static int qsv_decode_header(AVCodecContext *avctx, QSVContext *q,

> > +AVPacket *avpkt, enum AVPixelFormat *pix_fmts, mfxVideoParam

> *param) {

> > +    int ret;

> > +

> > +    mfxBitstream bs = { { { 0 } } };

> 

> { 0 }

> 

> > +

> > +    if (avpkt->size) {

> > +        bs.Data       = avpkt->data;

> > +        bs.DataLength = avpkt->size;

> > +        bs.MaxLength  = bs.DataLength;

> > +        bs.TimeStamp  = avpkt->pts;

> > +        if (avctx->field_order == AV_FIELD_PROGRESSIVE)

> > +            bs.DataFlag   |= MFX_BITSTREAM_COMPLETE_FRAME;

> > +    } else

> > +        return AVERROR_INVALIDDATA;

> > +

> > +

> > +    if(!q->session) {

> > +        ret = qsv_decode_preinit(avctx, q, pix_fmts, param);

> 

> I think you've misunderstood some of the mechanism of the get_format

> callback.  It can't be called before you have parsed out the stream

> information, because you don't know the necessary size and format

> information for the user to allocate the frames.  (Note that some of this is

> hinted if you use libavformat and copy the codec parameters from there,

> leading to cases where it mostly works in the ffmpeg utility but fails

> completely with libavcodec-only users.)


Thanks for your comment. But I think it should has been discussed on the patch V2 (https://patchwork.ffmpeg.org/patch/12112/ ) ?
[Mark] The reason for using the internal parsers is that you need the information before libmfx is initialized at all in the hw_frames_ctx case (i.e. before the
get_format callback which will supply the hardware context information), and once you require that anyway there isn't much point in parsing things twice for the same information.

[Zhong] As I see, there are very limited information needed before init a libmfx session (we must <and only need> init a libmfx session before call MFXVideoDECODE_DecodeHeader() ): There are resolution and pix_fmt. 
As you can see from my current implementation, I don't call internal parser before init the session, we are assuming a resolution (It may be provided from libavformat, but not must as Hendrik's comment) and pix_fmt (we assume it is NV12), and will correct it after MFXVideoDECODE_DecodeHeader(). It probably means we need to init the session twice for the first decoding call (e.g hevc/vp9 10bit clips, or resolution is not provided somewhere such as libavformat), but it is just happens when the first call (if header information is not changed after that) and the assumed resolution/pix_fmt is not correct.  
I think it is higher efficient than parse twice for every decoding calling, and it is not a workaround way since we still need to handling resolution/pix_fmt changing cases.

For libavcodec-only, it won't fails, the actual result is we need to call qsv_decode_preinit() twice: the first time is to allocate incorrect frames, after header parsed, we will check the format and resolution and find it is wrong, then will call qsv_decode_preinit() again to make sure a correct frame allocation. 

> Given the chicken-egg problem here for library initialisation, maybe what you

> want is to switch to using

> AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX?

> 

> > +        if (ret < 0)

> > +            return ret;

> > +    }

> > +

> > +    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);

> > +    if (ret < 0)

> > +        return ret;

> > +

> > +    param->mfx.CodecId = ret;

> > +    ret = MFXVideoDECODE_DecodeHeader(q->session, &bs, param);

> > +    if (MFX_ERR_MORE_DATA == ret) {

> > +       return AVERROR(EAGAIN);

> > +    }

> > +    if (ret < 0)

> > +        return ff_qsv_print_error(avctx, ret,

> > +                "Error decoding stream header");

> >

> >      return 0;

> >  }

> > @@ -494,7 +516,10 @@ int ff_qsv_process_data(AVCodecContext *avctx,

> QSVContext *q,

> >      uint8_t *dummy_data;

> >      int dummy_size;

> >      int ret;

> > -    const AVPixFmtDescriptor *desc;

> > +    mfxVideoParam param = { 0 };

> > +    enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,

> > +                                       AV_PIX_FMT_NV12,

> > +                                       AV_PIX_FMT_NONE };

> >

> >      if (!q->avctx_internal) {

> >          q->avctx_internal = avcodec_alloc_context3(NULL); @@

> -508,7

> > +533,6 @@ int ff_qsv_process_data(AVCodecContext *avctx, QSVContext

> *q,

> >              return AVERROR(ENOMEM);

> >

> >          q->parser->flags |= PARSER_FLAG_COMPLETE_FRAMES;

> > -        q->orig_pix_fmt   = AV_PIX_FMT_NONE;

> >      }

> >

> >      if (!pkt->size)

> > @@ -521,15 +545,17 @@ int ff_qsv_process_data(AVCodecContext

> *avctx, QSVContext *q,

> >                       pkt->data, pkt->size, pkt->pts, pkt->dts,

> >                       pkt->pos);

> >

> > -    avctx->field_order  = q->parser->field_order;

> >      /* TODO: flush delayed frames on reinit */

> > -    if (q->parser->format       != q->orig_pix_fmt    ||

> > -        FFALIGN(q->parser->coded_width, 16)  !=

> FFALIGN(avctx->coded_width, 16) ||

> > -        FFALIGN(q->parser->coded_height, 16) !=

> FFALIGN(avctx->coded_height, 16)) {

> > -        enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,

> > -                                           AV_PIX_FMT_NONE,

> > -

> AV_PIX_FMT_NONE };

> > -        enum AVPixelFormat qsv_format;

> > +

> > +    // sw_pix_fmt was initialized as NV12, will be updated after header

> decoded if not true.

> > +    if (q->orig_pix_fmt != AV_PIX_FMT_NONE)

> > +        pix_fmts[1] = q->orig_pix_fmt;

> > +

> > +    ret = qsv_decode_header(avctx, q, pkt, pix_fmts, &param);

> > +

> > +    if (ret >= 0 && (q->orig_pix_fmt !=

> ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC) ||

> > +        avctx->coded_width  != param.mfx.FrameInfo.Width ||

> > +        avctx->coded_height != param.mfx.FrameInfo.Height)) {

> >          AVPacket zero_pkt = {0};

> >

> >          if (q->buffered_count) {

> > @@ -538,45 +564,24 @@ int ff_qsv_process_data(AVCodecContext

> *avctx, QSVContext *q,

> >              q->buffered_count--;

> >              return qsv_decode(avctx, q, frame, got_frame,

> &zero_pkt);

> >          }

> > -

> >          q->reinit_flag = 0;

> >

> > -        qsv_format = ff_qsv_map_pixfmt(q->parser->format,

> &q->fourcc);

> > -        if (qsv_format < 0) {

> > -            av_log(avctx, AV_LOG_ERROR,

> > -                   "Decoding pixel format '%s' is not supported\n",

> > -                   av_get_pix_fmt_name(q->parser->format));

> > -            ret = AVERROR(ENOSYS);

> > -            goto reinit_fail;

> > -        }

> > +        q->orig_pix_fmt = avctx->pix_fmt = pix_fmts[1] =

> > + ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC);

> >

> > -        q->orig_pix_fmt     = q->parser->format;

> > -        avctx->pix_fmt      = pix_fmts[1] = qsv_format;

> > -        avctx->width        = q->parser->width;

> > -        avctx->height       = q->parser->height;

> > -        avctx->coded_width  = FFALIGN(q->parser->coded_width, 16);

> > -        avctx->coded_height = FFALIGN(q->parser->coded_height, 16);

> > -        avctx->level        = q->avctx_internal->level;

> > -        avctx->profile      = q->avctx_internal->profile;

> > +        avctx->coded_width  = param.mfx.FrameInfo.Width;

> > +        avctx->coded_height = param.mfx.FrameInfo.Height;

> >

> > -        ret = ff_get_format(avctx, pix_fmts);

> > +        ret = qsv_decode_preinit(avctx, q, pix_fmts, &param);

> >          if (ret < 0)

> >              goto reinit_fail;

> > +        q->initialized = 0;

> > +    }

> >

> > -        avctx->pix_fmt = ret;

> > -

> > -        desc = av_pix_fmt_desc_get(avctx->pix_fmt);

> > -        if (!desc)

> > -            goto reinit_fail;

> > -

> > -         if (desc->comp[0].depth > 8) {

> > -            avctx->coded_width =  FFALIGN(q->parser->coded_width,

> 32);

> > -            avctx->coded_height = FFALIGN(q->parser->coded_height,

> 32);

> > -        }

> > -

> > -        ret = qsv_decode_init(avctx, q);

> > +    if (!q->initialized) {

> > +        ret = qsv_decode_init(avctx, q, &param);

> >          if (ret < 0)

> >              goto reinit_fail;

> > +        q->initialized = 1;

> >      }

> >

> >      return qsv_decode(avctx, q, frame, got_frame, pkt); @@ -589,4

> > +594,5 @@ reinit_fail:

> >  void ff_qsv_decode_flush(AVCodecContext *avctx, QSVContext *q)  {

> >      q->orig_pix_fmt = AV_PIX_FMT_NONE;

> > +    q->initialized = 0;

> >  }

> > diff --git a/libavcodec/qsvdec.h b/libavcodec/qsvdec.h index

> > 111536caba..4812fb2a6b 100644

> > --- a/libavcodec/qsvdec.h

> > +++ b/libavcodec/qsvdec.h

> > @@ -63,6 +63,8 @@ typedef struct QSVContext {

> >      uint32_t fourcc;

> >      mfxFrameInfo frame_info;

> >

> > +    int initialized;

> > +

> >      // options set by the caller

> >      int async_depth;

> >      int iopattern;

> > diff --git a/libavcodec/qsvdec_h2645.c b/libavcodec/qsvdec_h2645.c

> > index 9b49f5506e..eb1dc336a4 100644

> > --- a/libavcodec/qsvdec_h2645.c

> > +++ b/libavcodec/qsvdec_h2645.c

> > @@ -103,6 +103,7 @@ static av_cold int

> qsv_decode_init(AVCodecContext *avctx)

> >          }

> >      }

> >

> > +    s->qsv.orig_pix_fmt = AV_PIX_FMT_NV12;

> 

> This setting looks very suspicious?  The real value is in the stream header,

> and you don't want to do anything with pixfmts until you've read it.

> 

> >      s->packet_fifo = av_fifo_alloc(sizeof(AVPacket));

> >      if (!s->packet_fifo) {

> >          ret = AVERROR(ENOMEM);

> > diff --git a/libavcodec/qsvdec_other.c b/libavcodec/qsvdec_other.c

> > index 03251d2c85..a6f1b88ca0 100644

> > --- a/libavcodec/qsvdec_other.c

> > +++ b/libavcodec/qsvdec_other.c

> > @@ -90,6 +90,7 @@ static av_cold int qsv_decode_init(AVCodecContext

> *avctx)

> >      }

> >  #endif

> >

> > +    s->qsv.orig_pix_fmt = AV_PIX_FMT_NV12;

> >      s->packet_fifo = av_fifo_alloc(sizeof(AVPacket));

> >      if (!s->packet_fifo) {

> >          ret = AVERROR(ENOMEM);

> >

> 

> I have to admit I'm still sceptical that this change is useful.  It seems like it

> would be better to fix up your cases where libmfx barfs on streams with bad

> profile or level information, and then to add the parser support for your

> additional codecs.


Personally speaking, I don't think it can be supported well in the parser, at least for VP9. 
Just like our discussion on patch V2 (https://patchwork.ffmpeg.org/patch/12116/ ): 

[Mark] Similarly, you will need to extend the VP9 parser to return the relevant
information for the following patch so that it works in general rather than only in cases where the user can supply it externally.  It should be quite
straightforward; see 182cf170a544bce069c8690c90b49381150a1f10.

[Zhong] There are something different from vp8 for vp9. VP9 frame resolution probably can't be got from bitstream but may be referred from reference frames, see: https://github.com/FFmpeg/FFmpeg/blob/master/libavcodec/vp9.c#L565 
If parsing header is separated form decoding process, it will be a problem how to get the reference list information.

If you think that case can be resolved for VP9 parser, could you please provide more details?
Gurulev, Dmitry March 18, 2019, 6:46 a.m. UTC | #13
> -----Original Message-----

> From: Rogozhkin, Dmitry V

> Sent: Saturday, March 16, 2019 4:18 AM

> To: Li, Zhong <zhong.li@intel.com>; ffmpeg-devel@ffmpeg.org; Gurulev, Dmitry

> <dmitry.gurulev@intel.com>

> Subject: Re: [FFmpeg-devel] [PATCH v3 3/6] lavc/qsvdec: Replace current parser

> with MFXVideoDECODE_DecodeHeader()

> 

> On Mon, 2019-03-11 at 17:23 +0800, Li, Zhong wrote:

> > > -----Original Message-----

> > > From: Rogozhkin, Dmitry V

> > > Sent: Saturday, March 9, 2019 8:48 AM

> > > To: ffmpeg-devel@ffmpeg.org

> > > Cc: Li, Zhong <zhong.li@intel.com>

> > > Subject: Re: [FFmpeg-devel] [PATCH v3 3/6] lavc/qsvdec: Replace

> > > current parser with MFXVideoDECODE_DecodeHeader()

> > >

> > > On Fri, 2019-03-08 at 15:40 +0800, Zhong Li wrote:

> > > > Using MSDK parser can improve qsv decoder pass rate in some cases

> > > > (E.g:

> > > > sps declares a wrong level_idc, smaller than it should be).

> > > > And it is necessary for adding new qsv decoders such as MJPEG and

> > > > VP9

> > > > since current parser can't provide enough information.

> > > > Actually using MFXVideoDECODE_DecodeHeader() was disscussed at

> > > > https://ffmpeg.org/pipermail/ffmpeg-devel/2015-July/175734.html

> > > > and

> > > > merged as commit 1acb19d, but was overwritten when merged libav

> > > > patches (commit: 1f26a23) without any explain.

> > > >

> > > > v2: split decode header from decode_init, and call it for

> > > > everyframe to detect format/resoultion change. It can fix some

> > > > regression issues such as hevc 10bits decoding.

> > > >

> > > > Signed-off-by: Zhong Li <zhong.li@intel.com>

> > > > ---

> > > >  libavcodec/qsvdec.c       | 172 ++++++++++++++++++++----------

> > > > ----

> > > > ----

> > > >  libavcodec/qsvdec.h       |   2 +

> > > >  libavcodec/qsvdec_h2645.c |   1 +

> > > >  libavcodec/qsvdec_other.c |   1 +

> > > >  4 files changed, 93 insertions(+), 83 deletions(-)

> > > >

> > > > diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c index

> > > > 4a0be811fb..b78026e14d 100644

> > > > --- a/libavcodec/qsvdec.c

> > > > +++ b/libavcodec/qsvdec.c

> > > > @@ -120,19 +120,17 @@ static inline unsigned int

> > > > qsv_fifo_size(const

> > > > AVFifoBuffer* fifo)

> > > >      return av_fifo_size(fifo) / qsv_fifo_item_size();

> > > >  }

> > > >

> > > > -static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q)

> > > > +static int qsv_decode_preinit(AVCodecContext *avctx, QSVContext

> > > > *q,

> > > > enum AVPixelFormat *pix_fmts, mfxVideoParam *param)

> > > >  {

> > > > -    const AVPixFmtDescriptor *desc;

> > > >      mfxSession session = NULL;

> > > >      int iopattern = 0;

> > > > -    mfxVideoParam param = { 0 };

> > > > -    int frame_width  = avctx->coded_width;

> > > > -    int frame_height = avctx->coded_height;

> > > >      int ret;

> > > >

> > > > -    desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);

> > > > -    if (!desc)

> > > > -        return AVERROR_BUG;

> > > > +    ret = ff_get_format(avctx, pix_fmts);

> > > > +    if (ret < 0) {

> > > > +        q->orig_pix_fmt = avctx->pix_fmt = AV_PIX_FMT_NONE;

> > > > +        return ret;

> > > > +    }

> > > >

> > > >      if (!q->async_fifo) {

> > > >          q->async_fifo = av_fifo_alloc(q->async_depth *

> > > > qsv_fifo_item_size()); @@ -170,48 +168,72 @@ static int

> > > > qsv_decode_init(AVCodecContext *avctx, QSVContext *q)

> > > >          return ret;

> > > >      }

> > > >

> > > > -    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);

> > > > -    if (ret < 0)

> > > > -        return ret;

> > > > +    param->IOPattern   = q->iopattern;

> > > > +    param->AsyncDepth  = q->async_depth;

> > > > +    param->ExtParam    = q->ext_buffers;

> > > > +    param->NumExtParam = q->nb_ext_buffers;

> > > >

> > > > -    param.mfx.CodecId      = ret;

> > > > -    param.mfx.CodecProfile = ff_qsv_profile_to_mfx(avctx-

> > > > >codec_id,

> > > > avctx->profile);

> > > > -    param.mfx.CodecLevel   = avctx->level ==

> > >

> > > FF_LEVEL_UNKNOWN ?

> > > > MFX_LEVEL_UNKNOWN : avctx->level;

> > > > -

> > > > -    param.mfx.FrameInfo.BitDepthLuma   = desc->comp[0].depth;

> > > > -    param.mfx.FrameInfo.BitDepthChroma = desc->comp[0].depth;

> > > > -    param.mfx.FrameInfo.Shift          = desc->comp[0].depth >

> > >

> > > 8;

> > > > -    param.mfx.FrameInfo.FourCC         = q->fourcc;

> > > > -    param.mfx.FrameInfo.Width          = frame_width;

> > > > -    param.mfx.FrameInfo.Height         = frame_height;

> > > > -    param.mfx.FrameInfo.ChromaFormat   =

> > >

> > > MFX_CHROMAFORMAT_YUV420;

> > > > -

> > > > -    switch (avctx->field_order) {

> > > > -    case AV_FIELD_PROGRESSIVE:

> > > > -        param.mfx.FrameInfo.PicStruct =

> > >

> > > MFX_PICSTRUCT_PROGRESSIVE;

> > > > -        break;

> > > > -    case AV_FIELD_TT:

> > > > -        param.mfx.FrameInfo.PicStruct =

> > >

> > > MFX_PICSTRUCT_FIELD_TFF;

> > > > -        break;

> > > > -    case AV_FIELD_BB:

> > > > -        param.mfx.FrameInfo.PicStruct =

> > >

> > > MFX_PICSTRUCT_FIELD_BFF;

> > > > -        break;

> > > > -    default:

> > > > -        param.mfx.FrameInfo.PicStruct =

> > >

> > > MFX_PICSTRUCT_UNKNOWN;

> > > > -        break;

> > > > -    }

> > > > +    return 0;

> > > > + }

> > > >

> > > > -    param.IOPattern   = q->iopattern;

> > > > -    param.AsyncDepth  = q->async_depth;

> > > > -    param.ExtParam    = q->ext_buffers;

> > > > -    param.NumExtParam = q->nb_ext_buffers;

> > > > +static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q,

> > > > mfxVideoParam *param)

> > > > +{

> > > > +    int ret;

> > > >

> > > > -    ret = MFXVideoDECODE_Init(q->session, &param);

> > > > +    avctx->width        = param->mfx.FrameInfo.CropW;

> > > > +    avctx->height       = param->mfx.FrameInfo.CropH;

> > > > +    avctx->coded_width  = param->mfx.FrameInfo.Width;

> > > > +    avctx->coded_height = param->mfx.FrameInfo.Height;

> > > > +    avctx->level        = param->mfx.CodecLevel;

> > > > +    avctx->profile      = param->mfx.CodecProfile;

> > > > +    avctx->field_order  = ff_qsv_map_picstruct(param-

> > > > > mfx.FrameInfo.PicStruct);

> > > >

> > > > +    avctx->pix_fmt      = ff_qsv_map_fourcc(param-

> > > > > mfx.FrameInfo.FourCC);

> > > >

> > > > +

> > > > +    ret = MFXVideoDECODE_Init(q->session, param);

> > > >      if (ret < 0)

> > > >          return ff_qsv_print_error(avctx, ret,

> > > >                                    "Error initializing the

> > >

> > > MFX video

> > > > decoder");

> > > >

> > > > -    q->frame_info = param.mfx.FrameInfo;

> > > > +    q->frame_info = param->mfx.FrameInfo;

> > > > +

> > > > +    return 0;

> > > > +}

> > > > +

> > > > +static int qsv_decode_header(AVCodecContext *avctx, QSVContext

> > > > *q,

> > > > AVPacket *avpkt, enum AVPixelFormat *pix_fmts, mfxVideoParam

> > > > *param)

> > > > +{

> > > > +    int ret;

> > > > +

> > > > +    mfxBitstream bs = { { { 0 } } };

> > > > +

> > > > +    if (avpkt->size) {

> > > > +        bs.Data       = avpkt->data;

> > > > +        bs.DataLength = avpkt->size;

> > > > +        bs.MaxLength  = bs.DataLength;

> > > > +        bs.TimeStamp  = avpkt->pts;

> > > > +        if (avctx->field_order == AV_FIELD_PROGRESSIVE)

> > > > +            bs.DataFlag   |=

> > >

> > > MFX_BITSTREAM_COMPLETE_FRAME;

> > > > +    } else

> > > > +        return AVERROR_INVALIDDATA;

> > > > +

> > > > +

> > > > +    if(!q->session) {

> > > > +        ret = qsv_decode_preinit(avctx, q, pix_fmts, param);

> > > > +        if (ret < 0)

> > > > +            return ret;

> > > > +    }

> > > > +

> > > > +    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);

> > > > +    if (ret < 0)

> > > > +        return ret;

> > > > +

> > > > +    param->mfx.CodecId = ret;

> > > > +    ret = MFXVideoDECODE_DecodeHeader(q->session, &bs, param);

> > > > +    if (MFX_ERR_MORE_DATA == ret) {

> > > > +       return AVERROR(EAGAIN);

> > > > +    }

> > > > +    if (ret < 0)

> > > > +        return ff_qsv_print_error(avctx, ret,

> > > > +                "Error decoding stream header");

> > > >

> > > >      return 0;

> > > >  }

> > > > @@ -494,7 +516,10 @@ int ff_qsv_process_data(AVCodecContext

> > > > *avctx,

> > > > QSVContext *q,

> > > >      uint8_t *dummy_data;

> > > >      int dummy_size;

> > > >      int ret;

> > > > -    const AVPixFmtDescriptor *desc;

> > > > +    mfxVideoParam param = { 0 };

> > > > +    enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,

> > > >

> > >

> > > +                                       AV_PIX_FMT_NV

> > > 12,

> > > >

> > >

> > > +                                       AV_PIX_FMT_N

> > > ONE };

> > > >

> > > >      if (!q->avctx_internal) {

> > > >          q->avctx_internal = avcodec_alloc_context3(NULL); @@

> > >

> > > -508,7

> > > > +533,6 @@ int ff_qsv_process_data(AVCodecContext *avctx,

> > > > QSVContext

> > > > *q,

> > > >              return AVERROR(ENOMEM);

> > > >

> > > >          q->parser->flags |= PARSER_FLAG_COMPLETE_FRAMES;

> > > > -        q->orig_pix_fmt   = AV_PIX_FMT_NONE;

> > > >      }

> > > >

> > > >      if (!pkt->size)

> > > > @@ -521,15 +545,17 @@ int ff_qsv_process_data(AVCodecContext

> > >

> > > *avctx,

> > > > QSVContext *q,

> > > >                       pkt->data, pkt->size, pkt->pts, pkt->dts,

> > > >                       pkt->pos);

> > > >

> > > > -    avctx->field_order  = q->parser->field_order;

> > > >      /* TODO: flush delayed frames on reinit */

> > > > -    if (q->parser->format       != q->orig_pix_fmt    ||

> > > > -        FFALIGN(q->parser->coded_width, 16)  != FFALIGN(avctx-

> > > > > coded_width, 16) ||

> > > >

> > > > -        FFALIGN(q->parser->coded_height, 16) != FFALIGN(avctx-

> > > > > coded_height, 16)) {

> > > >

> > > > -        enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,

> > > >

> > >

> > > -                                           AV_PIX_F MT_NONE,

> > > >

> > >

> > > -                                           AV_PIX_F MT_NONE };

> > > > -        enum AVPixelFormat qsv_format;

> > > > +

> > > > +    // sw_pix_fmt was initialized as NV12, will be updated after

> > > > header decoded if not true.

> > > > +    if (q->orig_pix_fmt != AV_PIX_FMT_NONE)

> > > > +        pix_fmts[1] = q->orig_pix_fmt;

> > >

> > > Well, it still seems non self-explanatory to me why we have

> > > pix_fmts[1] here.

> >

> > I think it has been explained and descripted in my comment. It means

> > sw_pix_fmt.

> > For more detail, probably you want to check the implementation of

> > ff_get_format().

> >

> > > > +

> > > > +    ret = qsv_decode_header(avctx, q, pkt, pix_fmts, &param);

> > >

> > > Parsing can be expensive to perform on each frame. See more below.

> >

> > 1. Parsing is just to reading header bits, I don't think it is CPU

> > heavy calculation task, compared with slice level decoding.


Slice level decoding (MB/CTU) is performed on GPU, it is not so correct to compare with.
Anyway, it is double work - 1st time is DecodeHeader and then DecodeFrameAsync will do the same
(NAL splitting, headers' parsing). In case of big NALs (4K AVC intra slices as e.g.) this might be a CPU burden.

> > 2. Mostly only key frames have completed header. For non-key frame,

> > just return MFX_ERR_MORE_DATA instead of real parsing work.

> >

> > > > +

> > > > +    if (ret >= 0 && (q->orig_pix_fmt !=

> > > > ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC) ||

> > > > +        avctx->coded_width  != param.mfx.FrameInfo.Width ||

> > > > +        avctx->coded_height != param.mfx.FrameInfo.Height)) {

> > > >          AVPacket zero_pkt = {0};

> > >

> > > You are actually trying to perform the task which is being done by

> > > mediasdk.

> > > Moreover, you don't detect all the cases when mediasdk decoder will

> > > require to be reset. For example, you may have 2 streams

> > > concatenated which have perfectly the same resolution, but still

> > > decoder reset will be required because of profile/level/gop

> > > structure changes and memory reallocation or better to say

> > > additional allocation could also be required because mediasdk may

> > > require different (bigger or lesser) number of surfaces to properly

> > > handle new bitstream. See more below.

> >

> > Which case of gop structure changing need to reset? Could you please,

> > give an example.

> 

> Dmitry Gurulev, can you, please, help answer these questions?

 
I believe Dmitry R. means changing of DPB size due to changing of level or SPS :: vui_parameters :: max_dec_frame_buffering (AVC).
Changing of profile may definitely demand re-initialization, for e.g. main (8b 420) -> rext (8b 420).

> >

> > > >

> > > >          if (q->buffered_count) {

> > > > @@ -538,45 +564,24 @@ int ff_qsv_process_data(AVCodecContext

> > >

> > > *avctx,

> > > > QSVContext *q,

> > > >              q->buffered_count--;

> > > >              return qsv_decode(avctx, q, frame, got_frame,

> > >

> > > &zero_pkt);

> > > >          }

> > > > -

> > > >          q->reinit_flag = 0;

> > > >

> > > > -        qsv_format = ff_qsv_map_pixfmt(q->parser->format, &q-

> > > > > fourcc);

> > > >

> > > > -        if (qsv_format < 0) {

> > > > -            av_log(avctx, AV_LOG_ERROR,

> > > > -                   "Decoding pixel format '%s' is not

> > >

> > > supported\n",

> > > > -                   av_get_pix_fmt_name(q->parser->format));

> > > > -            ret = AVERROR(ENOSYS);

> > > > -            goto reinit_fail;

> > > > -        }

> > > > +        q->orig_pix_fmt = avctx->pix_fmt = pix_fmts[1] =

> > > > ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC);

> > > >

> > > > -        q->orig_pix_fmt     = q->parser->format;

> > > > -        avctx->pix_fmt      = pix_fmts[1] = qsv_format;

> > > > -        avctx->width        = q->parser->width;

> > > > -        avctx->height       = q->parser->height;

> > > > -        avctx->coded_width  = FFALIGN(q->parser->coded_width,

> > >

> > > 16);

> > > > -        avctx->coded_height = FFALIGN(q->parser->coded_height,

> > >

> > > 16);

> > > > -        avctx->level        = q->avctx_internal->level;

> > > > -        avctx->profile      = q->avctx_internal->profile;

> > > > +        avctx->coded_width  = param.mfx.FrameInfo.Width;

> > > > +        avctx->coded_height = param.mfx.FrameInfo.Height;

> > > >

> > > > -        ret = ff_get_format(avctx, pix_fmts);

> > > > +        ret = qsv_decode_preinit(avctx, q, pix_fmts, &param);

> > > >          if (ret < 0)

> > > >              goto reinit_fail;

> > > > +        q->initialized = 0;

> > > > +    }

> > > >

> > > > -        avctx->pix_fmt = ret;

> > > > -

> > > > -        desc = av_pix_fmt_desc_get(avctx->pix_fmt);

> > > > -        if (!desc)

> > > > -            goto reinit_fail;

> > > > -

> > > > -         if (desc->comp[0].depth > 8) {

> > > > -            avctx->coded_width

> > >

> > > =  FFALIGN(q->parser->coded_width,

> > > > 32);

> > > > -            avctx->coded_height =

> > >

> > > FFALIGN(q->parser->coded_height,

> > > > 32);

> > > > -        }

> > > > -

> > > > -        ret = qsv_decode_init(avctx, q);

> > > > +    if (!q->initialized) {

> > > > +        ret = qsv_decode_init(avctx, q, &param);

> > > >          if (ret < 0)

> > > >              goto reinit_fail;

> > > > +        q->initialized = 1;

> > > >      }

> > > >

> > > >      return qsv_decode(avctx, q, frame, got_frame, pkt);

> > >

> > > So, from my perspective you should not attempt to call decode header

> > > on each incoming data portion and try to interpret results. Instead

> > > you should respect what mediasdk DecodeFrameAsync is returning.

> > > Basically it will return MFX_ERR_INCOMPATIBLE_VIDEO_PARAM is decoder

> > > reset is required.

> > > This gives you 2 situations when you should call

> > > DecodeHeader:

> > > 1. You either call it if decoder is not initalized at all 2. Or you

> > > call it upon receiving MFX_ERR_INCOMPATIBLE_VIDEO_PARAM

> > >

> > > Once you received MFX_ERR_INCOMPATIBLE_VIDEO_PARAM step to

> perform

> > > are:

> > > 1. Flush cached frames from medisdk decoder passing nullptr

> > > bitstream to DecodeFrameAsync 2. Run decode header and get updated

> > > parameters 3.

> > > Check (via QueryIOSurf) and potentially reallocate surfaces to

> > > satisfy new bitstream requirement 4. Reset decoder with new

> > > parameters

> >

> > 1. This patch is just to replace original parser to MSDK parser. I

> > don't like to mix other thing here (thus probably introduces new bugs,

> > but full round testing has been done for this patch).

> >   If it is really necessary, should be done by an additional patch. 2.

> > Unfortunately, if we don't reset and set correct resolution/pix_fmt,

> > FFmpeg-qsv already failed in the progress of MSDK initialization (e,g:

> > pass a NV12 format but actually it is P010. see

> > : https://github.com/FFmpeg/FFmpeg/blob/master/libavcodec/qsv.c#L424

> > )

> >   There is no opportunity to get the status of DecodeFrameAsync at

> > these case.


Could u please give me more details about that case?

> >I haven't compared MSDK sample decoder with current FFmpeg

> > (Will take a look later). But IIRC, MSDK sample decoder isn't doing a

> > better job than ffmpeg-qsv for reinit case.

> >   (one example, it is hang when run sample_decode h264 -i /fate-

> > suite/h264/reinit-large_420_8-to-small_420_8.h264 -r , and this clip

> > is changing from a larger resolution to a smaller one. I wonder to

> > know what will happen if change resolution from smaller to a larger,

> > eg, 720x480 to 1920x1080).


DecodeFrameAsync should return MFX_ERR_INCOMPATIBLE_VIDEO_PARAM.
Where I may find '/fate-suite/h264/reinit-large_420_8-to-small_420_8.h264' clip to
Check what is going on w/ sample_decode?

> > 3. IMHO, reinit work is better to do before decoding, instead of

> > decoding it and recover it after it is failed.


Actually, if something is changing (new SPS), no decoding is performed, DecodeFrameAsync just returns
MFX_ERR_INCOMPATIBLE_VIDEO_PARAM, there is nothing to recover (rollback).

> > 4. This a common function for all decoders, I would like to see it is

> > under control of ffmpeg. MSDK may have no unified implementation of

> > all codecs ( e.g: https://github.com/Intel-Media-SDK/MediaSDK/issues/

> > 333 ).

> >   Your suggestion may be addressed only when I see any new issues or

> > bugs. But currently I can't see the necessity.

> >

> > > Application may attempt to optimize procedure on step 3 above and

> > > avoid surfaces reallocation if possible. For example, if you have

> > > allocated

> > > 1920x1080 surfaces and resolution changed to 720x480, you may

> > > actually stay on 1920x1080 if you are ok to use more memory hoping

> > > to receive

> > > 1920x1080 stream in the future again. Eventually this would work if

> > > you have enough number of 1920x1080 surfaces.

> >

> > Hm, this is a simpler topic, https://patchwork.ffmpeg.org/patch/12257

> > / is to prepare this though decoder should be updated too. But as

> > previous discussion, thus should be a separated patch and need pre-

> > testing.


--------------------------------------------------------------------
Joint Stock Company Intel A/O
Registered legal address: Krylatsky Hills Business Park,
17 Krylatskaya Str., Bldg 4, Moscow 121614,
Russian Federation

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.
Gurulev, Dmitry March 18, 2019, 6:50 a.m. UTC | #14
> -----Original Message-----

> From: Rogozhkin, Dmitry V

> Sent: Saturday, March 16, 2019 4:17 AM

> To: Li, Zhong <zhong.li@intel.com>; ffmpeg-devel@ffmpeg.org; Gurulev, Dmitry

> <dmitry.gurulev@intel.com>

> Subject: Re: [FFmpeg-devel] [PATCH v3 3/6] lavc/qsvdec: Replace current parser

> with MFXVideoDECODE_DecodeHeader()

> 

> On Thu, 2019-03-14 at 19:03 +0800, Li, Zhong wrote:

> > > From: Rogozhkin, Dmitry V

> > > Sent: Tuesday, March 12, 2019 7:37 AM

> > > To: Li, Zhong <zhong.li@intel.com>; ffmpeg-devel@ffmpeg.org

> > > Subject: Re: [FFmpeg-devel] [PATCH v3 3/6] lavc/qsvdec: Replace

> > > current parser with MFXVideoDECODE_DecodeHeader()

> > >

> > > On Mon, 2019-03-11 at 17:23 +0800, Li, Zhong wrote:

> > > > > -----Original Message-----

> > > > > From: Rogozhkin, Dmitry V

> > > > > Sent: Saturday, March 9, 2019 8:48 AM

> > > > > To: ffmpeg-devel@ffmpeg.org

> > > > > Cc: Li, Zhong <zhong.li@intel.com>

> > > > > Subject: Re: [FFmpeg-devel] [PATCH v3 3/6] lavc/qsvdec: Replace

> > > > > current parser with MFXVideoDECODE_DecodeHeader()

> > > > >

> > > > > On Fri, 2019-03-08 at 15:40 +0800, Zhong Li wrote:

> > > > > > Using MSDK parser can improve qsv decoder pass rate in some

> > > > > > cases

> > > > > > (E.g:

> > > > > > sps declares a wrong level_idc, smaller than it should be).

> > > > > > And it is necessary for adding new qsv decoders such as MJPEG

> > > > > > and

> > > > > > VP9

> > > > > > since current parser can't provide enough information.

> > > > > > Actually using MFXVideoDECODE_DecodeHeader() was disscussed at

> > > > > > https://ffmpeg.org/pipermail/ffmpeg-devel/2015-July/175734.ht

> > > > > > ml

> > > > > > and

> > > > > > merged as commit 1acb19d, but was overwritten when merged

> > > > > > libav patches (commit: 1f26a23) without any explain.

> > > > > >

> > > > > > v2: split decode header from decode_init, and call it for

> > > > > > everyframe to detect format/resoultion change. It can fix some

> > > > > > regression issues such as hevc 10bits decoding.

> > > > > >

> > > > > > Signed-off-by: Zhong Li <zhong.li@intel.com>

> > > > > > ---

> > > > > >  libavcodec/qsvdec.c       | 172

> > >

> > > ++++++++++++++++++++----------

> > > > > > ----

> > > > > > ----

> > > > > >  libavcodec/qsvdec.h       |   2 +

> > > > > >  libavcodec/qsvdec_h2645.c |   1 +

> > > > > >  libavcodec/qsvdec_other.c |   1 +

> > > > > >  4 files changed, 93 insertions(+), 83 deletions(-)

> > > > > >

> > > > > > diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c index

> > > > > > 4a0be811fb..b78026e14d 100644

> > > > > > --- a/libavcodec/qsvdec.c

> > > > > > +++ b/libavcodec/qsvdec.c

> > > > > > @@ -120,19 +120,17 @@ static inline unsigned int

> > > > > > qsv_fifo_size(const

> > > > > > AVFifoBuffer* fifo)

> > > > > >      return av_fifo_size(fifo) / qsv_fifo_item_size();

> > > > > >  }

> > > > > >

> > > > > > -static int qsv_decode_init(AVCodecContext *avctx, QSVContext

> > > > > > *q)

> > > > > > +static int qsv_decode_preinit(AVCodecContext *avctx,

> > > > > > QSVContext

> > > > > > *q,

> > > > > > enum AVPixelFormat *pix_fmts, mfxVideoParam *param)

> > > > > >  {

> > > > > > -    const AVPixFmtDescriptor *desc;

> > > > > >      mfxSession session = NULL;

> > > > > >      int iopattern = 0;

> > > > > > -    mfxVideoParam param = { 0 };

> > > > > > -    int frame_width  = avctx->coded_width;

> > > > > > -    int frame_height = avctx->coded_height;

> > > > > >      int ret;

> > > > > >

> > > > > > -    desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);

> > > > > > -    if (!desc)

> > > > > > -        return AVERROR_BUG;

> > > > > > +    ret = ff_get_format(avctx, pix_fmts);

> > > > > > +    if (ret < 0) {

> > > > > > +        q->orig_pix_fmt = avctx->pix_fmt = AV_PIX_FMT_NONE;

> > > > > > +        return ret;

> > > > > > +    }

> > > > > >

> > > > > >      if (!q->async_fifo) {

> > > > > >          q->async_fifo = av_fifo_alloc(q->async_depth *

> > > > > > qsv_fifo_item_size()); @@ -170,48 +168,72 @@ static int

> > > > > > qsv_decode_init(AVCodecContext *avctx, QSVContext *q)

> > > > > >          return ret;

> > > > > >      }

> > > > > >

> > > > > > -    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);

> > > > > > -    if (ret < 0)

> > > > > > -        return ret;

> > > > > > +    param->IOPattern   = q->iopattern;

> > > > > > +    param->AsyncDepth  = q->async_depth;

> > > > > > +    param->ExtParam    = q->ext_buffers;

> > > > > > +    param->NumExtParam = q->nb_ext_buffers;

> > > > > >

> > > > > > -    param.mfx.CodecId      = ret;

> > > > > > -    param.mfx.CodecProfile = ff_qsv_profile_to_mfx(avctx-

> > > > > > > codec_id,

> > > > > >

> > > > > > avctx->profile);

> > > > > > -    param.mfx.CodecLevel   = avctx->level ==

> > > > >

> > > > > FF_LEVEL_UNKNOWN ?

> > > > > > MFX_LEVEL_UNKNOWN : avctx->level;

> > > > > > -

> > > > > > -    param.mfx.FrameInfo.BitDepthLuma   =

> > >

> > > desc->comp[0].depth;

> > > > > > -    param.mfx.FrameInfo.BitDepthChroma = desc-

> > > > > > >comp[0].depth;

> > > > > > -    param.mfx.FrameInfo.Shift          =

> > >

> > > desc->comp[0].depth >

> > > > >

> > > > > 8;

> > > > > > -    param.mfx.FrameInfo.FourCC         = q->fourcc;

> > > > > > -    param.mfx.FrameInfo.Width          = frame_width;

> > > > > > -    param.mfx.FrameInfo.Height         = frame_height;

> > > > > > -    param.mfx.FrameInfo.ChromaFormat   =

> > > > >

> > > > > MFX_CHROMAFORMAT_YUV420;

> > > > > > -

> > > > > > -    switch (avctx->field_order) {

> > > > > > -    case AV_FIELD_PROGRESSIVE:

> > > > > > -        param.mfx.FrameInfo.PicStruct =

> > > > >

> > > > > MFX_PICSTRUCT_PROGRESSIVE;

> > > > > > -        break;

> > > > > > -    case AV_FIELD_TT:

> > > > > > -        param.mfx.FrameInfo.PicStruct =

> > > > >

> > > > > MFX_PICSTRUCT_FIELD_TFF;

> > > > > > -        break;

> > > > > > -    case AV_FIELD_BB:

> > > > > > -        param.mfx.FrameInfo.PicStruct =

> > > > >

> > > > > MFX_PICSTRUCT_FIELD_BFF;

> > > > > > -        break;

> > > > > > -    default:

> > > > > > -        param.mfx.FrameInfo.PicStruct =

> > > > >

> > > > > MFX_PICSTRUCT_UNKNOWN;

> > > > > > -        break;

> > > > > > -    }

> > > > > > +    return 0;

> > > > > > + }

> > > > > >

> > > > > > -    param.IOPattern   = q->iopattern;

> > > > > > -    param.AsyncDepth  = q->async_depth;

> > > > > > -    param.ExtParam    = q->ext_buffers;

> > > > > > -    param.NumExtParam = q->nb_ext_buffers;

> > > > > > +static int qsv_decode_init(AVCodecContext *avctx, QSVContext

> > > > > > *q,

> > > > > > mfxVideoParam *param)

> > > > > > +{

> > > > > > +    int ret;

> > > > > >

> > > > > > -    ret = MFXVideoDECODE_Init(q->session, &param);

> > > > > > +    avctx->width        = param->mfx.FrameInfo.CropW;

> > > > > > +    avctx->height       = param->mfx.FrameInfo.CropH;

> > > > > > +    avctx->coded_width  = param->mfx.FrameInfo.Width;

> > > > > > +    avctx->coded_height = param->mfx.FrameInfo.Height;

> > > > > > +    avctx->level        = param->mfx.CodecLevel;

> > > > > > +    avctx->profile      = param->mfx.CodecProfile;

> > > > > > +    avctx->field_order  = ff_qsv_map_picstruct(param-

> > > > > > > mfx.FrameInfo.PicStruct);

> > > > > >

> > > > > > +    avctx->pix_fmt      = ff_qsv_map_fourcc(param-

> > > > > > > mfx.FrameInfo.FourCC);

> > > > > >

> > > > > > +

> > > > > > +    ret = MFXVideoDECODE_Init(q->session, param);

> > > > > >      if (ret < 0)

> > > > > >          return ff_qsv_print_error(avctx, ret,

> > > > > >                                    "Error initializing

> > >

> > > the

> > > > >

> > > > > MFX video

> > > > > > decoder");

> > > > > >

> > > > > > -    q->frame_info = param.mfx.FrameInfo;

> > > > > > +    q->frame_info = param->mfx.FrameInfo;

> > > > > > +

> > > > > > +    return 0;

> > > > > > +}

> > > > > > +

> > > > > > +static int qsv_decode_header(AVCodecContext *avctx,

> > > > > > QSVContext

> > > > > > *q,

> > > > > > AVPacket *avpkt, enum AVPixelFormat *pix_fmts, mfxVideoParam

> > > > > > *param)

> > > > > > +{

> > > > > > +    int ret;

> > > > > > +

> > > > > > +    mfxBitstream bs = { { { 0 } } };

> > > > > > +

> > > > > > +    if (avpkt->size) {

> > > > > > +        bs.Data       = avpkt->data;

> > > > > > +        bs.DataLength = avpkt->size;

> > > > > > +        bs.MaxLength  = bs.DataLength;

> > > > > > +        bs.TimeStamp  = avpkt->pts;

> > > > > > +        if (avctx->field_order == AV_FIELD_PROGRESSIVE)

> > > > > > +            bs.DataFlag   |=

> > > > >

> > > > > MFX_BITSTREAM_COMPLETE_FRAME;

> > > > > > +    } else

> > > > > > +        return AVERROR_INVALIDDATA;

> > > > > > +

> > > > > > +

> > > > > > +    if(!q->session) {

> > > > > > +        ret = qsv_decode_preinit(avctx, q, pix_fmts, param);

> > > > > > +        if (ret < 0)

> > > > > > +            return ret;

> > > > > > +    }

> > > > > > +

> > > > > > +    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);

> > > > > > +    if (ret < 0)

> > > > > > +        return ret;

> > > > > > +

> > > > > > +    param->mfx.CodecId = ret;

> > > > > > +    ret = MFXVideoDECODE_DecodeHeader(q->session, &bs,

> > >

> > > param);

> > > > > > +    if (MFX_ERR_MORE_DATA == ret) {

> > > > > > +       return AVERROR(EAGAIN);

> > > > > > +    }

> > > > > > +    if (ret < 0)

> > > > > > +        return ff_qsv_print_error(avctx, ret,

> > > > > > +                "Error decoding stream header");

> > > > > >

> > > > > >      return 0;

> > > > > >  }

> > > > > > @@ -494,7 +516,10 @@ int ff_qsv_process_data(AVCodecContext

> > > > > > *avctx,

> > > > > > QSVContext *q,

> > > > > >      uint8_t *dummy_data;

> > > > > >      int dummy_size;

> > > > > >      int ret;

> > > > > > -    const AVPixFmtDescriptor *desc;

> > > > > > +    mfxVideoParam param = { 0 };

> > > > > > +    enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,

> > > > > >

> > > > >

> > > > >

> > >

> > > +                                       AV_PIX_FMT_NV

> > > > > 12,

> > > > > >

> > > > >

> > > > >

> > >

> > > +                                       AV_PIX_FMT_N

> > > > > ONE };

> > > > > >

> > > > > >      if (!q->avctx_internal) {

> > > > > >          q->avctx_internal = avcodec_alloc_context3(NULL); @@

> > > > >

> > > > > -508,7

> > > > > > +533,6 @@ int ff_qsv_process_data(AVCodecContext *avctx,

> > > > > > QSVContext

> > > > > > *q,

> > > > > >              return AVERROR(ENOMEM);

> > > > > >

> > > > > >          q->parser->flags |= PARSER_FLAG_COMPLETE_FRAMES;

> > > > > > -        q->orig_pix_fmt   = AV_PIX_FMT_NONE;

> > > > > >      }

> > > > > >

> > > > > >      if (!pkt->size)

> > > > > > @@ -521,15 +545,17 @@ int ff_qsv_process_data(AVCodecContext

> > > > >

> > > > > *avctx,

> > > > > > QSVContext *q,

> > > > > >                       pkt->data, pkt->size, pkt->pts,

> > >

> > > pkt->dts,

> > > > > >                       pkt->pos);

> > > > > >

> > > > > > -    avctx->field_order  = q->parser->field_order;

> > > > > >      /* TODO: flush delayed frames on reinit */

> > > > > > -    if (q->parser->format       != q->orig_pix_fmt    ||

> > > > > > -        FFALIGN(q->parser->coded_width, 16)  !=

> > >

> > > FFALIGN(avctx-

> > > > > > > coded_width, 16) ||

> > > > > >

> > > > > > -        FFALIGN(q->parser->coded_height, 16) !=

> > > > > > FFALIGN(avctx-

> > > > > > > coded_height, 16)) {

> > > > > >

> > > > > > -        enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,

> > > > > >

> > > > >

> > > > >

> > >

> > > -                                           AV_PIX_F MT_NONE,

> > > > > >

> > > > >

> > > > >

> > >

> > > -                                           AV_PIX_F MT_NONE };

> > > > > > -        enum AVPixelFormat qsv_format;

> > > > > > +

> > > > > > +    // sw_pix_fmt was initialized as NV12, will be updated

> > > > > > after

> > > > > > header decoded if not true.

> > > > > > +    if (q->orig_pix_fmt != AV_PIX_FMT_NONE)

> > > > > > +        pix_fmts[1] = q->orig_pix_fmt;

> > > > >

> > > > > Well, it still seems non self-explanatory to me why we have

> > > > > pix_fmts[1] here.

> > > >

> > > > I think it has been explained and descripted in my comment. It

> > > > means sw_pix_fmt.

> > > > For more detail, probably you want to check the implementation of

> > > > ff_get_format().

> > > >

> > > > > > +

> > > > > > +    ret = qsv_decode_header(avctx, q, pkt, pix_fmts,

> > > > > > &param);

> > > > >

> > > > > Parsing can be expensive to perform on each frame. See more

> > > > > below.

> > > >

> > > > 1. Parsing is just to reading header bits, I don't think it is CPU

> > > > heavy calculation task, compared with slice level decoding.

> > > > 2. Mostly only key frames have completed header. For non-key

> > > > frame, just return MFX_ERR_MORE_DATA instead of real parsing work.

> > >

> > >

> > > That was long when I looked myself in details of DecodeHeader, but I

> > > think that what the following will happen if frame is missing a

> > > header and you feed it into mediasdk: mediasdk will read the whole

> > > bitstream to the end trying to find a header. Once it will fail to

> > > find it mediasdk will return MFX_ERR_MORE_DATA. I think you will be

> > > able to see that if you will print mfxBitstream::DataOffset and

> > > mfxBitstream::DataLength before and after your call for

> > > MFXVideoDECODE_DecodeHeader(). So, mediasdk actually changes

> > > DataOffset either pointing to the beginning of the header or right

> > > after the header (I don't remember exactly, sorry). If header not

> > > found  it sets DataOffset=DataLength and DataLength=0 requesting new

> > > bitstream portion. The reason why you think everything is ok is that

> > > you construct mfxBitstream structure anew each time in

> > > qsv_decode_header.

> > > Hence, you just don't notice that mediasdk performs a massive

> > > parsing.

> > >

> > > I afraid that as is your change will result in massive CPU%

> > > regressions.

> >

> > That is a good point, if massive CPU% regressions really happens,

> > definitely it should be taken seriously.

> > Then I compared the performance and CPU usage with and without this

> > patch. I can't find obvious performance drop or higher CPU usage, at

> > least for h264.

> > And I take a little deep to check how MSDK search start code: https:/

> > /github.com/Intel-Media-

> >

> SDK/MediaSDK/blob/master/_studio/shared/umc/codec/h264_dec/src/umc_h2

> > 64_nal_spl.cpp#L319

> > If MSDK can find the start code, not matter SPS/PPS or slice header

> > start code, it will end the search.

> > Yes, mostly for non-key frame, there is no SPS/PPS. But slice header

> > is still exist, thus mean MSDK don't need to search the whole packet.


But DecodeHeader tries to find SPS/PPS so it will process the whole input.

> > For case of AnnexC, ffmpeg-qsv will convert it to be AnnexB: https://

> > github.com/FFmpeg/FFmpeg/blob/master/libavcodec/qsvdec_h2645.c#L257,

> > that can make sure start code is always existed.

> >

> > If you agree above, I would like to keep current implementation and

> > push this patch, else it will pending the MJPEG/VP9 decoding patches.

> >

> >

> 

> Dmitry Gurulev, can you, please, help answer these questions?


--------------------------------------------------------------------
Joint Stock Company Intel A/O
Registered legal address: Krylatsky Hills Business Park,
17 Krylatskaya Str., Bldg 4, Moscow 121614,
Russian Federation

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.
diff mbox

Patch

diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c
index 4a0be811fb..b78026e14d 100644
--- a/libavcodec/qsvdec.c
+++ b/libavcodec/qsvdec.c
@@ -120,19 +120,17 @@  static inline unsigned int qsv_fifo_size(const AVFifoBuffer* fifo)
     return av_fifo_size(fifo) / qsv_fifo_item_size();
 }
 
-static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q)
+static int qsv_decode_preinit(AVCodecContext *avctx, QSVContext *q, enum AVPixelFormat *pix_fmts, mfxVideoParam *param)
 {
-    const AVPixFmtDescriptor *desc;
     mfxSession session = NULL;
     int iopattern = 0;
-    mfxVideoParam param = { 0 };
-    int frame_width  = avctx->coded_width;
-    int frame_height = avctx->coded_height;
     int ret;
 
-    desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
-    if (!desc)
-        return AVERROR_BUG;
+    ret = ff_get_format(avctx, pix_fmts);
+    if (ret < 0) {
+        q->orig_pix_fmt = avctx->pix_fmt = AV_PIX_FMT_NONE;
+        return ret;
+    }
 
     if (!q->async_fifo) {
         q->async_fifo = av_fifo_alloc(q->async_depth * qsv_fifo_item_size());
@@ -170,48 +168,72 @@  static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q)
         return ret;
     }
 
-    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);
-    if (ret < 0)
-        return ret;
+    param->IOPattern   = q->iopattern;
+    param->AsyncDepth  = q->async_depth;
+    param->ExtParam    = q->ext_buffers;
+    param->NumExtParam = q->nb_ext_buffers;
 
-    param.mfx.CodecId      = ret;
-    param.mfx.CodecProfile = ff_qsv_profile_to_mfx(avctx->codec_id, avctx->profile);
-    param.mfx.CodecLevel   = avctx->level == FF_LEVEL_UNKNOWN ? MFX_LEVEL_UNKNOWN : avctx->level;
-
-    param.mfx.FrameInfo.BitDepthLuma   = desc->comp[0].depth;
-    param.mfx.FrameInfo.BitDepthChroma = desc->comp[0].depth;
-    param.mfx.FrameInfo.Shift          = desc->comp[0].depth > 8;
-    param.mfx.FrameInfo.FourCC         = q->fourcc;
-    param.mfx.FrameInfo.Width          = frame_width;
-    param.mfx.FrameInfo.Height         = frame_height;
-    param.mfx.FrameInfo.ChromaFormat   = MFX_CHROMAFORMAT_YUV420;
-
-    switch (avctx->field_order) {
-    case AV_FIELD_PROGRESSIVE:
-        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_PROGRESSIVE;
-        break;
-    case AV_FIELD_TT:
-        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_FIELD_TFF;
-        break;
-    case AV_FIELD_BB:
-        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_FIELD_BFF;
-        break;
-    default:
-        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_UNKNOWN;
-        break;
-    }
+    return 0;
+ }
 
-    param.IOPattern   = q->iopattern;
-    param.AsyncDepth  = q->async_depth;
-    param.ExtParam    = q->ext_buffers;
-    param.NumExtParam = q->nb_ext_buffers;
+static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q, mfxVideoParam *param)
+{
+    int ret;
 
-    ret = MFXVideoDECODE_Init(q->session, &param);
+    avctx->width        = param->mfx.FrameInfo.CropW;
+    avctx->height       = param->mfx.FrameInfo.CropH;
+    avctx->coded_width  = param->mfx.FrameInfo.Width;
+    avctx->coded_height = param->mfx.FrameInfo.Height;
+    avctx->level        = param->mfx.CodecLevel;
+    avctx->profile      = param->mfx.CodecProfile;
+    avctx->field_order  = ff_qsv_map_picstruct(param->mfx.FrameInfo.PicStruct);
+    avctx->pix_fmt      = ff_qsv_map_fourcc(param->mfx.FrameInfo.FourCC);
+
+    ret = MFXVideoDECODE_Init(q->session, param);
     if (ret < 0)
         return ff_qsv_print_error(avctx, ret,
                                   "Error initializing the MFX video decoder");
 
-    q->frame_info = param.mfx.FrameInfo;
+    q->frame_info = param->mfx.FrameInfo;
+
+    return 0;
+}
+
+static int qsv_decode_header(AVCodecContext *avctx, QSVContext *q, AVPacket *avpkt, enum AVPixelFormat *pix_fmts, mfxVideoParam *param)
+{
+    int ret;
+
+    mfxBitstream bs = { { { 0 } } };
+
+    if (avpkt->size) {
+        bs.Data       = avpkt->data;
+        bs.DataLength = avpkt->size;
+        bs.MaxLength  = bs.DataLength;
+        bs.TimeStamp  = avpkt->pts;
+        if (avctx->field_order == AV_FIELD_PROGRESSIVE)
+            bs.DataFlag   |= MFX_BITSTREAM_COMPLETE_FRAME;
+    } else
+        return AVERROR_INVALIDDATA;
+
+
+    if(!q->session) {
+        ret = qsv_decode_preinit(avctx, q, pix_fmts, param);
+        if (ret < 0)
+            return ret;
+    }
+
+    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);
+    if (ret < 0)
+        return ret;
+
+    param->mfx.CodecId = ret;
+    ret = MFXVideoDECODE_DecodeHeader(q->session, &bs, param);
+    if (MFX_ERR_MORE_DATA == ret) {
+       return AVERROR(EAGAIN);
+    }
+    if (ret < 0)
+        return ff_qsv_print_error(avctx, ret,
+                "Error decoding stream header");
 
     return 0;
 }
@@ -494,7 +516,10 @@  int ff_qsv_process_data(AVCodecContext *avctx, QSVContext *q,
     uint8_t *dummy_data;
     int dummy_size;
     int ret;
-    const AVPixFmtDescriptor *desc;
+    mfxVideoParam param = { 0 };
+    enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,
+                                       AV_PIX_FMT_NV12,
+                                       AV_PIX_FMT_NONE };
 
     if (!q->avctx_internal) {
         q->avctx_internal = avcodec_alloc_context3(NULL);
@@ -508,7 +533,6 @@  int ff_qsv_process_data(AVCodecContext *avctx, QSVContext *q,
             return AVERROR(ENOMEM);
 
         q->parser->flags |= PARSER_FLAG_COMPLETE_FRAMES;
-        q->orig_pix_fmt   = AV_PIX_FMT_NONE;
     }
 
     if (!pkt->size)
@@ -521,15 +545,17 @@  int ff_qsv_process_data(AVCodecContext *avctx, QSVContext *q,
                      pkt->data, pkt->size, pkt->pts, pkt->dts,
                      pkt->pos);
 
-    avctx->field_order  = q->parser->field_order;
     /* TODO: flush delayed frames on reinit */
-    if (q->parser->format       != q->orig_pix_fmt    ||
-        FFALIGN(q->parser->coded_width, 16)  != FFALIGN(avctx->coded_width, 16) ||
-        FFALIGN(q->parser->coded_height, 16) != FFALIGN(avctx->coded_height, 16)) {
-        enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,
-                                           AV_PIX_FMT_NONE,
-                                           AV_PIX_FMT_NONE };
-        enum AVPixelFormat qsv_format;
+
+    // sw_pix_fmt was initialized as NV12, will be updated after header decoded if not true.
+    if (q->orig_pix_fmt != AV_PIX_FMT_NONE)
+        pix_fmts[1] = q->orig_pix_fmt;
+
+    ret = qsv_decode_header(avctx, q, pkt, pix_fmts, &param);
+
+    if (ret >= 0 && (q->orig_pix_fmt != ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC) ||
+        avctx->coded_width  != param.mfx.FrameInfo.Width ||
+        avctx->coded_height != param.mfx.FrameInfo.Height)) {
         AVPacket zero_pkt = {0};
 
         if (q->buffered_count) {
@@ -538,45 +564,24 @@  int ff_qsv_process_data(AVCodecContext *avctx, QSVContext *q,
             q->buffered_count--;
             return qsv_decode(avctx, q, frame, got_frame, &zero_pkt);
         }
-
         q->reinit_flag = 0;
 
-        qsv_format = ff_qsv_map_pixfmt(q->parser->format, &q->fourcc);
-        if (qsv_format < 0) {
-            av_log(avctx, AV_LOG_ERROR,
-                   "Decoding pixel format '%s' is not supported\n",
-                   av_get_pix_fmt_name(q->parser->format));
-            ret = AVERROR(ENOSYS);
-            goto reinit_fail;
-        }
+        q->orig_pix_fmt = avctx->pix_fmt = pix_fmts[1] = ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC);
 
-        q->orig_pix_fmt     = q->parser->format;
-        avctx->pix_fmt      = pix_fmts[1] = qsv_format;
-        avctx->width        = q->parser->width;
-        avctx->height       = q->parser->height;
-        avctx->coded_width  = FFALIGN(q->parser->coded_width, 16);
-        avctx->coded_height = FFALIGN(q->parser->coded_height, 16);
-        avctx->level        = q->avctx_internal->level;
-        avctx->profile      = q->avctx_internal->profile;
+        avctx->coded_width  = param.mfx.FrameInfo.Width;
+        avctx->coded_height = param.mfx.FrameInfo.Height;
 
-        ret = ff_get_format(avctx, pix_fmts);
+        ret = qsv_decode_preinit(avctx, q, pix_fmts, &param);
         if (ret < 0)
             goto reinit_fail;
+        q->initialized = 0;
+    }
 
-        avctx->pix_fmt = ret;
-
-        desc = av_pix_fmt_desc_get(avctx->pix_fmt);
-        if (!desc)
-            goto reinit_fail;
-
-         if (desc->comp[0].depth > 8) {
-            avctx->coded_width =  FFALIGN(q->parser->coded_width, 32);
-            avctx->coded_height = FFALIGN(q->parser->coded_height, 32);
-        }
-
-        ret = qsv_decode_init(avctx, q);
+    if (!q->initialized) {
+        ret = qsv_decode_init(avctx, q, &param);
         if (ret < 0)
             goto reinit_fail;
+        q->initialized = 1;
     }
 
     return qsv_decode(avctx, q, frame, got_frame, pkt);
@@ -589,4 +594,5 @@  reinit_fail:
 void ff_qsv_decode_flush(AVCodecContext *avctx, QSVContext *q)
 {
     q->orig_pix_fmt = AV_PIX_FMT_NONE;
+    q->initialized = 0;
 }
diff --git a/libavcodec/qsvdec.h b/libavcodec/qsvdec.h
index 111536caba..4812fb2a6b 100644
--- a/libavcodec/qsvdec.h
+++ b/libavcodec/qsvdec.h
@@ -63,6 +63,8 @@  typedef struct QSVContext {
     uint32_t fourcc;
     mfxFrameInfo frame_info;
 
+    int initialized;
+
     // options set by the caller
     int async_depth;
     int iopattern;
diff --git a/libavcodec/qsvdec_h2645.c b/libavcodec/qsvdec_h2645.c
index 9b49f5506e..eb1dc336a4 100644
--- a/libavcodec/qsvdec_h2645.c
+++ b/libavcodec/qsvdec_h2645.c
@@ -103,6 +103,7 @@  static av_cold int qsv_decode_init(AVCodecContext *avctx)
         }
     }
 
+    s->qsv.orig_pix_fmt = AV_PIX_FMT_NV12;
     s->packet_fifo = av_fifo_alloc(sizeof(AVPacket));
     if (!s->packet_fifo) {
         ret = AVERROR(ENOMEM);
diff --git a/libavcodec/qsvdec_other.c b/libavcodec/qsvdec_other.c
index 03251d2c85..a6f1b88ca0 100644
--- a/libavcodec/qsvdec_other.c
+++ b/libavcodec/qsvdec_other.c
@@ -90,6 +90,7 @@  static av_cold int qsv_decode_init(AVCodecContext *avctx)
     }
 #endif
 
+    s->qsv.orig_pix_fmt = AV_PIX_FMT_NV12;
     s->packet_fifo = av_fifo_alloc(sizeof(AVPacket));
     if (!s->packet_fifo) {
         ret = AVERROR(ENOMEM);