diff mbox

[FFmpeg-devel,2/5] lavc/qsvdec: Replace current parser with MFXVideoDECODE_DecodeHeader()

Message ID 1548074517-6578-3-git-send-email-zhong.li@intel.com
State New
Headers show

Commit Message

Zhong Li Jan. 21, 2019, 12:41 p.m. UTC
Using MSDK parser can improve qsv decoder pass rate in some cases (E.g:
sps declares a wrong level_idc, smaller than it should be).
And it is necessary for adding new qsv decoders such as MJPEG and VP9
since current parser can't provide enough information.
Actually using MFXVideoDECODE_DecodeHeader() was disscussed at
https://ffmpeg.org/pipermail/ffmpeg-devel/2015-July/175734.html and merged as commit 1acb19d,
but was overwritten when merged libav patches (commit: 1f26a23) without any explain.

Signed-off-by: Zhong Li <zhong.li@intel.com>
---
 libavcodec/qsvdec.c | 103 ++++++++++++++++------------------------------------
 libavcodec/qsvdec.h |   2 +
 2 files changed, 33 insertions(+), 72 deletions(-)

Comments

Rogozhkin, Dmitry V Jan. 24, 2019, 9:43 p.m. UTC | #1
On Mon, 2019-01-21 at 20:41 +0800, Zhong Li wrote:
> Using MSDK parser can improve qsv decoder pass rate in some cases

> (E.g:

> sps declares a wrong level_idc, smaller than it should be).

> And it is necessary for adding new qsv decoders such as MJPEG and VP9

> since current parser can't provide enough information.

> Actually using MFXVideoDECODE_DecodeHeader() was disscussed at

> https://ffmpeg.org/pipermail/ffmpeg-devel/2015-July/175734.html and

> merged as commit 1acb19d,

> but was overwritten when merged libav patches (commit: 1f26a23)

> without any explain.

> 

> Signed-off-by: Zhong Li <zhong.li@intel.com>

> ---

>  libavcodec/qsvdec.c | 103 ++++++++++++++++------------------------

> ------------

>  libavcodec/qsvdec.h |   2 +

>  2 files changed, 33 insertions(+), 72 deletions(-)

> 

> diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c

> index 4a0be81..013400b 100644

> --- a/libavcodec/qsvdec.c

> +++ b/libavcodec/qsvdec.c

> @@ -120,7 +120,7 @@ static inline unsigned int qsv_fifo_size(const

> AVFifoBuffer* fifo)

>      return av_fifo_size(fifo) / qsv_fifo_item_size();

>  }

>  

> -static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q)

> +static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q,

> AVPacket *avpkt)

>  {

>      const AVPixFmtDescriptor *desc;

>      mfxSession session = NULL;

> @@ -129,6 +129,17 @@ static int qsv_decode_init(AVCodecContext

> *avctx, QSVContext *q)

>      int frame_width  = avctx->coded_width;

>      int frame_height = avctx->coded_height;

>      int ret;

> +    mfxBitstream bs = { { { 0 } } };

> +

> +    if (avpkt->size) {

> +        bs.Data       = avpkt->data;

> +        bs.DataLength = avpkt->size;

> +        bs.MaxLength  = bs.DataLength;

> +        bs.TimeStamp  = avpkt->pts;

> +        if (avctx->field_order == AV_FIELD_PROGRESSIVE)

> +            bs.DataFlag   |= MFX_BITSTREAM_COMPLETE_FRAME;

+    } else
> +        return AVERROR_INVALIDDATA;

>  

>      desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);

>      if (!desc)

> @@ -174,32 +185,19 @@ static int qsv_decode_init(AVCodecContext

> *avctx, QSVContext *q)

>      if (ret < 0)

>          return ret;

>  

> -    param.mfx.CodecId      = ret;

> -    param.mfx.CodecProfile = ff_qsv_profile_to_mfx(avctx->codec_id,

> avctx->profile);

> -    param.mfx.CodecLevel   = avctx->level == FF_LEVEL_UNKNOWN ?

> MFX_LEVEL_UNKNOWN : avctx->level;

> -

> -    param.mfx.FrameInfo.BitDepthLuma   = desc->comp[0].depth;

> -    param.mfx.FrameInfo.BitDepthChroma = desc->comp[0].depth;

> -    param.mfx.FrameInfo.Shift          = desc->comp[0].depth > 8;

> -    param.mfx.FrameInfo.FourCC         = q->fourcc;

> -    param.mfx.FrameInfo.Width          = frame_width;

> -    param.mfx.FrameInfo.Height         = frame_height;

> -    param.mfx.FrameInfo.ChromaFormat   = MFX_CHROMAFORMAT_YUV420;

> -

> -    switch (avctx->field_order) {

> -    case AV_FIELD_PROGRESSIVE:

> -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_PROGRESSIVE;

> -        break;

> -    case AV_FIELD_TT:

> -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_FIELD_TFF;

> -        break;

> -    case AV_FIELD_BB:

> -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_FIELD_BFF;

> -        break;

> -    default:

> -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_UNKNOWN;

> -        break;

> -    }

> +    param.mfx.CodecId = ret;

> +    ret = MFXVideoDECODE_DecodeHeader(q->session, &bs, &param);

This function may potentially return MFX_ERR_MORE_DATA if provided
bitstream don't contain full header. I am not sure whether ffmpeg will
guarantee that... And the decoding error reported by Artie suggests
that something is wrong around this. That can be ffmpeg or mediasdk
issue - need to check what was the data which ffmpeg really passed to
DecodeHeader.

> +    if (ret < 0)

> +        return ff_qsv_print_error(avctx, ret,

> +                "Error decoding stream header");

> +

> +    avctx->width        = param.mfx.FrameInfo.CropW;

> +    avctx->height       = param.mfx.FrameInfo.CropH;

> +    avctx->coded_width  = param.mfx.FrameInfo.Width;

> +    avctx->coded_height = param.mfx.FrameInfo.Height;

> +    avctx->level        = param.mfx.CodecProfile;

> +    avctx->profile      = param.mfx.CodecLevel;

Typo here. You assign profile to level and level to profile.

> +    avctx->field_order  =

> ff_qsv_map_picstruct(param.mfx.FrameInfo.PicStruct);

>  

>      param.IOPattern   = q->iopattern;

>      param.AsyncDepth  = q->async_depth;

> @@ -521,62 +519,22 @@ int ff_qsv_process_data(AVCodecContext *avctx,

> QSVContext *q,

>                       pkt->data, pkt->size, pkt->pts, pkt->dts,

>                       pkt->pos);

>  

> -    avctx->field_order  = q->parser->field_order;

>      /* TODO: flush delayed frames on reinit */

> -    if (q->parser->format       != q->orig_pix_fmt    ||

> -        FFALIGN(q->parser->coded_width, 16)  != FFALIGN(avctx-

> >coded_width, 16) ||

> -        FFALIGN(q->parser->coded_height, 16) != FFALIGN(avctx-

> >coded_height, 16)) {

> +

> +    if (!q->initialized){

>          enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,

> -                                           AV_PIX_FMT_NONE,

> +                                           AV_PIX_FMT_NV12,

>                                             AV_PIX_FMT_NONE };

> -        enum AVPixelFormat qsv_format;

> -        AVPacket zero_pkt = {0};

> -

> -        if (q->buffered_count) {

> -            q->reinit_flag = 1;

> -            /* decode zero-size pkt to flush the buffered pkt before

> reinit */

> -            q->buffered_count--;

> -            return qsv_decode(avctx, q, frame, got_frame,

> &zero_pkt);

> -        }

> -

> -        q->reinit_flag = 0;

> -

> -        qsv_format = ff_qsv_map_pixfmt(q->parser->format, &q-

> >fourcc);

> -        if (qsv_format < 0) {

> -            av_log(avctx, AV_LOG_ERROR,

> -                   "Decoding pixel format '%s' is not supported\n",

> -                   av_get_pix_fmt_name(q->parser->format));

> -            ret = AVERROR(ENOSYS);

> -            goto reinit_fail;

> -        }

> -

> -        q->orig_pix_fmt     = q->parser->format;

> -        avctx->pix_fmt      = pix_fmts[1] = qsv_format;

> -        avctx->width        = q->parser->width;

> -        avctx->height       = q->parser->height;

> -        avctx->coded_width  = FFALIGN(q->parser->coded_width, 16);

> -        avctx->coded_height = FFALIGN(q->parser->coded_height, 16);

> -        avctx->level        = q->avctx_internal->level;

> -        avctx->profile      = q->avctx_internal->profile;

> -

>          ret = ff_get_format(avctx, pix_fmts);

>          if (ret < 0)

>              goto reinit_fail;

>  

>          avctx->pix_fmt = ret;

>  

> -        desc = av_pix_fmt_desc_get(avctx->pix_fmt);

> -        if (!desc)

> -            goto reinit_fail;

> -

> -         if (desc->comp[0].depth > 8) {

> -            avctx->coded_width =  FFALIGN(q->parser->coded_width,

> 32);

> -            avctx->coded_height = FFALIGN(q->parser->coded_height,

> 32);

> -        }

> -

> -        ret = qsv_decode_init(avctx, q);

> +        ret = qsv_decode_init(avctx, q, pkt);

>          if (ret < 0)

>              goto reinit_fail;

> +        q->initialized = 1;

>      }

>  

>      return qsv_decode(avctx, q, frame, got_frame, pkt);

> @@ -589,4 +547,5 @@ reinit_fail:

>  void ff_qsv_decode_flush(AVCodecContext *avctx, QSVContext *q)

>  {

>      q->orig_pix_fmt = AV_PIX_FMT_NONE;

> +    q->initialized = 0;

>  }

> diff --git a/libavcodec/qsvdec.h b/libavcodec/qsvdec.h

> index 111536c..4812fb2 100644

> --- a/libavcodec/qsvdec.h

> +++ b/libavcodec/qsvdec.h

> @@ -63,6 +63,8 @@ typedef struct QSVContext {

>      uint32_t fourcc;

>      mfxFrameInfo frame_info;

>  

> +    int initialized;

> +

>      // options set by the caller

>      int async_depth;

>      int iopattern;
Rogozhkin, Dmitry V Jan. 24, 2019, 9:54 p.m. UTC | #2
On Thu, 2019-01-24 at 21:43 +0000, Rogozhkin, Dmitry V wrote:
> On Mon, 2019-01-21 at 20:41 +0800, Zhong Li wrote:

> > Using MSDK parser can improve qsv decoder pass rate in some cases

> > (E.g:

> > sps declares a wrong level_idc, smaller than it should be).

> > And it is necessary for adding new qsv decoders such as MJPEG and

> > VP9

> > since current parser can't provide enough information.

> > Actually using MFXVideoDECODE_DecodeHeader() was disscussed at

> > https://ffmpeg.org/pipermail/ffmpeg-devel/2015-July/175734.html and

> > merged as commit 1acb19d,

> > but was overwritten when merged libav patches (commit: 1f26a23)

> > without any explain.

> > 

> > Signed-off-by: Zhong Li <zhong.li@intel.com>

> > ---

> >  libavcodec/qsvdec.c | 103 ++++++++++++++++------------------------

> > ------------

> >  libavcodec/qsvdec.h |   2 +

> >  2 files changed, 33 insertions(+), 72 deletions(-)

> > 

> > diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c

> > index 4a0be81..013400b 100644

> > --- a/libavcodec/qsvdec.c

> > +++ b/libavcodec/qsvdec.c

> > @@ -120,7 +120,7 @@ static inline unsigned int qsv_fifo_size(const

> > AVFifoBuffer* fifo)

> >      return av_fifo_size(fifo) / qsv_fifo_item_size();

> >  }

> >  

> > -static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q)

> > +static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q,

> > AVPacket *avpkt)

> >  {

> >      const AVPixFmtDescriptor *desc;

> >      mfxSession session = NULL;

> > @@ -129,6 +129,17 @@ static int qsv_decode_init(AVCodecContext

> > *avctx, QSVContext *q)

> >      int frame_width  = avctx->coded_width;

> >      int frame_height = avctx->coded_height;

> >      int ret;

> > +    mfxBitstream bs = { { { 0 } } };

> > +

> > +    if (avpkt->size) {

> > +        bs.Data       = avpkt->data;

> > +        bs.DataLength = avpkt->size;

> > +        bs.MaxLength  = bs.DataLength;

> > +        bs.TimeStamp  = avpkt->pts;

> > +        if (avctx->field_order == AV_FIELD_PROGRESSIVE)

> > +            bs.DataFlag   |= MFX_BITSTREAM_COMPLETE_FRAME;

When this flag is passed mediasdk expect that "the bitstream buffer
contains a complete frame or complementary field pair of data for the
bitstream". So, this flag don't generally speaking relates to field
order type. Instead it notifies mediasdk how much data does bitstream
contains. I am not sure how ffmpeg deals with bitstream and breaks it
into packages. Can someone, please, clarify? I am particularly
interested in how very beginning of the bitsream is handled and how
SPS/PPS is being passed since this may have effect on the DecodeHeader
introduced below.

> 

> +    } else

> > +        return AVERROR_INVALIDDATA;

> >  

> >      desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);

> >      if (!desc)

> > @@ -174,32 +185,19 @@ static int qsv_decode_init(AVCodecContext

> > *avctx, QSVContext *q)

> >      if (ret < 0)

> >          return ret;

> >  

> > -    param.mfx.CodecId      = ret;

> > -    param.mfx.CodecProfile = ff_qsv_profile_to_mfx(avctx-

> > >codec_id,

> > avctx->profile);

> > -    param.mfx.CodecLevel   = avctx->level == FF_LEVEL_UNKNOWN ?

> > MFX_LEVEL_UNKNOWN : avctx->level;

> > -

> > -    param.mfx.FrameInfo.BitDepthLuma   = desc->comp[0].depth;

> > -    param.mfx.FrameInfo.BitDepthChroma = desc->comp[0].depth;

> > -    param.mfx.FrameInfo.Shift          = desc->comp[0].depth > 8;

> > -    param.mfx.FrameInfo.FourCC         = q->fourcc;

> > -    param.mfx.FrameInfo.Width          = frame_width;

> > -    param.mfx.FrameInfo.Height         = frame_height;

> > -    param.mfx.FrameInfo.ChromaFormat   = MFX_CHROMAFORMAT_YUV420;

> > -

> > -    switch (avctx->field_order) {

> > -    case AV_FIELD_PROGRESSIVE:

> > -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_PROGRESSIVE;

> > -        break;

> > -    case AV_FIELD_TT:

> > -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_FIELD_TFF;

> > -        break;

> > -    case AV_FIELD_BB:

> > -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_FIELD_BFF;

> > -        break;

> > -    default:

> > -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_UNKNOWN;

> > -        break;

> > -    }

> > +    param.mfx.CodecId = ret;

> > +    ret = MFXVideoDECODE_DecodeHeader(q->session, &bs, &param);

> 

> This function may potentially return MFX_ERR_MORE_DATA if provided

> bitstream don't contain full header. I am not sure whether ffmpeg

> will

> guarantee that... And the decoding error reported by Artie suggests

> that something is wrong around this. That can be ffmpeg or mediasdk

> issue - need to check what was the data which ffmpeg really passed to

> DecodeHeader.

> 

> > +    if (ret < 0)

> > +        return ff_qsv_print_error(avctx, ret,

> > +                "Error decoding stream header");

> > +

> > +    avctx->width        = param.mfx.FrameInfo.CropW;

> > +    avctx->height       = param.mfx.FrameInfo.CropH;

> > +    avctx->coded_width  = param.mfx.FrameInfo.Width;

> > +    avctx->coded_height = param.mfx.FrameInfo.Height;

> > +    avctx->level        = param.mfx.CodecProfile;

> > +    avctx->profile      = param.mfx.CodecLevel;

> 

> Typo here. You assign profile to level and level to profile.

> 

> > +    avctx->field_order  =

> > ff_qsv_map_picstruct(param.mfx.FrameInfo.PicStruct);

> >  

> >      param.IOPattern   = q->iopattern;

> >      param.AsyncDepth  = q->async_depth;

> > @@ -521,62 +519,22 @@ int ff_qsv_process_data(AVCodecContext

> > *avctx,

> > QSVContext *q,

> >                       pkt->data, pkt->size, pkt->pts, pkt->dts,

> >                       pkt->pos);

> >  

> > -    avctx->field_order  = q->parser->field_order;

> >      /* TODO: flush delayed frames on reinit */

> > -    if (q->parser->format       != q->orig_pix_fmt    ||

> > -        FFALIGN(q->parser->coded_width, 16)  != FFALIGN(avctx-

> > > coded_width, 16) ||

> > 

> > -        FFALIGN(q->parser->coded_height, 16) != FFALIGN(avctx-

> > > coded_height, 16)) {

> > 

> > +

> > +    if (!q->initialized){

> >          enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,

> > -                                           AV_PIX_FMT_NONE,

> > +                                           AV_PIX_FMT_NV12,

> >                                             AV_PIX_FMT_NONE };

> > -        enum AVPixelFormat qsv_format;

> > -        AVPacket zero_pkt = {0};

> > -

> > -        if (q->buffered_count) {

> > -            q->reinit_flag = 1;

> > -            /* decode zero-size pkt to flush the buffered pkt

> > before

> > reinit */

> > -            q->buffered_count--;

> > -            return qsv_decode(avctx, q, frame, got_frame,

> > &zero_pkt);

> > -        }

> > -

> > -        q->reinit_flag = 0;

> > -

> > -        qsv_format = ff_qsv_map_pixfmt(q->parser->format, &q-

> > > fourcc);

> > 

> > -        if (qsv_format < 0) {

> > -            av_log(avctx, AV_LOG_ERROR,

> > -                   "Decoding pixel format '%s' is not

> > supported\n",

> > -                   av_get_pix_fmt_name(q->parser->format));

> > -            ret = AVERROR(ENOSYS);

> > -            goto reinit_fail;

> > -        }

> > -

> > -        q->orig_pix_fmt     = q->parser->format;

> > -        avctx->pix_fmt      = pix_fmts[1] = qsv_format;

> > -        avctx->width        = q->parser->width;

> > -        avctx->height       = q->parser->height;

> > -        avctx->coded_width  = FFALIGN(q->parser->coded_width, 16);

> > -        avctx->coded_height = FFALIGN(q->parser->coded_height,

> > 16);

> > -        avctx->level        = q->avctx_internal->level;

> > -        avctx->profile      = q->avctx_internal->profile;

> > -

> >          ret = ff_get_format(avctx, pix_fmts);

> >          if (ret < 0)

> >              goto reinit_fail;

> >  

> >          avctx->pix_fmt = ret;

> >  

> > -        desc = av_pix_fmt_desc_get(avctx->pix_fmt);

> > -        if (!desc)

> > -            goto reinit_fail;

> > -

> > -         if (desc->comp[0].depth > 8) {

> > -            avctx->coded_width =  FFALIGN(q->parser->coded_width,

> > 32);

> > -            avctx->coded_height = FFALIGN(q->parser->coded_height,

> > 32);

> > -        }

> > -

> > -        ret = qsv_decode_init(avctx, q);

> > +        ret = qsv_decode_init(avctx, q, pkt);

> >          if (ret < 0)

> >              goto reinit_fail;

> > +        q->initialized = 1;

> >      }

> >  

> >      return qsv_decode(avctx, q, frame, got_frame, pkt);

> > @@ -589,4 +547,5 @@ reinit_fail:

> >  void ff_qsv_decode_flush(AVCodecContext *avctx, QSVContext *q)

> >  {

> >      q->orig_pix_fmt = AV_PIX_FMT_NONE;

> > +    q->initialized = 0;

> >  }

> > diff --git a/libavcodec/qsvdec.h b/libavcodec/qsvdec.h

> > index 111536c..4812fb2 100644

> > --- a/libavcodec/qsvdec.h

> > +++ b/libavcodec/qsvdec.h

> > @@ -63,6 +63,8 @@ typedef struct QSVContext {

> >      uint32_t fourcc;

> >      mfxFrameInfo frame_info;

> >  

> > +    int initialized;

> > +

> >      // options set by the caller

> >      int async_depth;

> >      int iopattern;

> 

> _______________________________________________

> ffmpeg-devel mailing list

> ffmpeg-devel@ffmpeg.org

> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Zhong Li Jan. 25, 2019, 10:43 a.m. UTC | #3
> > +    ret = MFXVideoDECODE_DecodeHeader(q->session, &bs, &param);

> This function may potentially return MFX_ERR_MORE_DATA if provided

> bitstream don't contain full header. I am not sure whether ffmpeg will

> guarantee that... And the decoding error reported by Artie suggests that

> something is wrong around this. That can be ffmpeg or mediasdk issue -

> need to check what was the data which ffmpeg really passed to

> DecodeHeader.


MFX_ERROR_MORE_DATA should be handled, let me try to find such a clip and then updated this patch.
"decoding error reported by Artie" is another case I believe. Thus is due to P010 format hasn't been handled well,10bit hevc clip format 
is P010 but now it is treated as NV12 now. 

>          enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,

> -                                           AV_PIX_FMT_NONE,

> +                                           AV_PIX_FMT_NV12,

>                                             AV_PIX_FMT_NONE };


avctx->pix_fmt should be get from MFXVideoDECODE_DecodeHeader() but it is needed to init MSDK session, and looks like a deadlock, :(
One solution is that we assume avctx->format has been parsed somewhere else (such as avformat_find_stream_info() form libavformt) before start decoding.

> > +    if (ret < 0)

> > +        return ff_qsv_print_error(avctx, ret,

> > +                "Error decoding stream header");

> > +

> > +    avctx->width        = param.mfx.FrameInfo.CropW;

> > +    avctx->height       = param.mfx.FrameInfo.CropH;

> > +    avctx->coded_width  = param.mfx.FrameInfo.Width;

> > +    avctx->coded_height = param.mfx.FrameInfo.Height;

> > +    avctx->level        = param.mfx.CodecProfile;

> > +    avctx->profile      = param.mfx.CodecLevel;

> Typo here. You assign profile to level and level to profile.


Good catch. Will update, : )
Hendrik Leppkes Jan. 25, 2019, 11:09 a.m. UTC | #4
On Fri, Jan 25, 2019 at 11:43 AM Li, Zhong <zhong.li@intel.com> wrote:
> avctx->pix_fmt should be get from MFXVideoDECODE_DecodeHeader() but it is needed to init MSDK session, and looks like a deadlock, :(
> One solution is that we assume avctx->format has been parsed somewhere else (such as avformat_find_stream_info() form libavformt) before start decoding.
>

You cannot rely on that. Especially since noone knows what format the
decoder is going to use. avformat will tell you its YUV420P10, not
P010.
A user who doesn't use avformat at all and only feed avcodec will
likely not set avctx->format at all, since basically nothing else
needs it.

A decoder has to figure out the pixel format by itself.

- Hendrik
diff mbox

Patch

diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c
index 4a0be81..013400b 100644
--- a/libavcodec/qsvdec.c
+++ b/libavcodec/qsvdec.c
@@ -120,7 +120,7 @@  static inline unsigned int qsv_fifo_size(const AVFifoBuffer* fifo)
     return av_fifo_size(fifo) / qsv_fifo_item_size();
 }
 
-static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q)
+static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q, AVPacket *avpkt)
 {
     const AVPixFmtDescriptor *desc;
     mfxSession session = NULL;
@@ -129,6 +129,17 @@  static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q)
     int frame_width  = avctx->coded_width;
     int frame_height = avctx->coded_height;
     int ret;
+    mfxBitstream bs = { { { 0 } } };
+
+    if (avpkt->size) {
+        bs.Data       = avpkt->data;
+        bs.DataLength = avpkt->size;
+        bs.MaxLength  = bs.DataLength;
+        bs.TimeStamp  = avpkt->pts;
+        if (avctx->field_order == AV_FIELD_PROGRESSIVE)
+            bs.DataFlag   |= MFX_BITSTREAM_COMPLETE_FRAME;
+    } else
+        return AVERROR_INVALIDDATA;
 
     desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
     if (!desc)
@@ -174,32 +185,19 @@  static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q)
     if (ret < 0)
         return ret;
 
-    param.mfx.CodecId      = ret;
-    param.mfx.CodecProfile = ff_qsv_profile_to_mfx(avctx->codec_id, avctx->profile);
-    param.mfx.CodecLevel   = avctx->level == FF_LEVEL_UNKNOWN ? MFX_LEVEL_UNKNOWN : avctx->level;
-
-    param.mfx.FrameInfo.BitDepthLuma   = desc->comp[0].depth;
-    param.mfx.FrameInfo.BitDepthChroma = desc->comp[0].depth;
-    param.mfx.FrameInfo.Shift          = desc->comp[0].depth > 8;
-    param.mfx.FrameInfo.FourCC         = q->fourcc;
-    param.mfx.FrameInfo.Width          = frame_width;
-    param.mfx.FrameInfo.Height         = frame_height;
-    param.mfx.FrameInfo.ChromaFormat   = MFX_CHROMAFORMAT_YUV420;
-
-    switch (avctx->field_order) {
-    case AV_FIELD_PROGRESSIVE:
-        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_PROGRESSIVE;
-        break;
-    case AV_FIELD_TT:
-        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_FIELD_TFF;
-        break;
-    case AV_FIELD_BB:
-        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_FIELD_BFF;
-        break;
-    default:
-        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_UNKNOWN;
-        break;
-    }
+    param.mfx.CodecId = ret;
+    ret = MFXVideoDECODE_DecodeHeader(q->session, &bs, &param);
+    if (ret < 0)
+        return ff_qsv_print_error(avctx, ret,
+                "Error decoding stream header");
+
+    avctx->width        = param.mfx.FrameInfo.CropW;
+    avctx->height       = param.mfx.FrameInfo.CropH;
+    avctx->coded_width  = param.mfx.FrameInfo.Width;
+    avctx->coded_height = param.mfx.FrameInfo.Height;
+    avctx->level        = param.mfx.CodecProfile;
+    avctx->profile      = param.mfx.CodecLevel;
+    avctx->field_order  = ff_qsv_map_picstruct(param.mfx.FrameInfo.PicStruct);
 
     param.IOPattern   = q->iopattern;
     param.AsyncDepth  = q->async_depth;
@@ -521,62 +519,22 @@  int ff_qsv_process_data(AVCodecContext *avctx, QSVContext *q,
                      pkt->data, pkt->size, pkt->pts, pkt->dts,
                      pkt->pos);
 
-    avctx->field_order  = q->parser->field_order;
     /* TODO: flush delayed frames on reinit */
-    if (q->parser->format       != q->orig_pix_fmt    ||
-        FFALIGN(q->parser->coded_width, 16)  != FFALIGN(avctx->coded_width, 16) ||
-        FFALIGN(q->parser->coded_height, 16) != FFALIGN(avctx->coded_height, 16)) {
+
+    if (!q->initialized){
         enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,
-                                           AV_PIX_FMT_NONE,
+                                           AV_PIX_FMT_NV12,
                                            AV_PIX_FMT_NONE };
-        enum AVPixelFormat qsv_format;
-        AVPacket zero_pkt = {0};
-
-        if (q->buffered_count) {
-            q->reinit_flag = 1;
-            /* decode zero-size pkt to flush the buffered pkt before reinit */
-            q->buffered_count--;
-            return qsv_decode(avctx, q, frame, got_frame, &zero_pkt);
-        }
-
-        q->reinit_flag = 0;
-
-        qsv_format = ff_qsv_map_pixfmt(q->parser->format, &q->fourcc);
-        if (qsv_format < 0) {
-            av_log(avctx, AV_LOG_ERROR,
-                   "Decoding pixel format '%s' is not supported\n",
-                   av_get_pix_fmt_name(q->parser->format));
-            ret = AVERROR(ENOSYS);
-            goto reinit_fail;
-        }
-
-        q->orig_pix_fmt     = q->parser->format;
-        avctx->pix_fmt      = pix_fmts[1] = qsv_format;
-        avctx->width        = q->parser->width;
-        avctx->height       = q->parser->height;
-        avctx->coded_width  = FFALIGN(q->parser->coded_width, 16);
-        avctx->coded_height = FFALIGN(q->parser->coded_height, 16);
-        avctx->level        = q->avctx_internal->level;
-        avctx->profile      = q->avctx_internal->profile;
-
         ret = ff_get_format(avctx, pix_fmts);
         if (ret < 0)
             goto reinit_fail;
 
         avctx->pix_fmt = ret;
 
-        desc = av_pix_fmt_desc_get(avctx->pix_fmt);
-        if (!desc)
-            goto reinit_fail;
-
-         if (desc->comp[0].depth > 8) {
-            avctx->coded_width =  FFALIGN(q->parser->coded_width, 32);
-            avctx->coded_height = FFALIGN(q->parser->coded_height, 32);
-        }
-
-        ret = qsv_decode_init(avctx, q);
+        ret = qsv_decode_init(avctx, q, pkt);
         if (ret < 0)
             goto reinit_fail;
+        q->initialized = 1;
     }
 
     return qsv_decode(avctx, q, frame, got_frame, pkt);
@@ -589,4 +547,5 @@  reinit_fail:
 void ff_qsv_decode_flush(AVCodecContext *avctx, QSVContext *q)
 {
     q->orig_pix_fmt = AV_PIX_FMT_NONE;
+    q->initialized = 0;
 }
diff --git a/libavcodec/qsvdec.h b/libavcodec/qsvdec.h
index 111536c..4812fb2 100644
--- a/libavcodec/qsvdec.h
+++ b/libavcodec/qsvdec.h
@@ -63,6 +63,8 @@  typedef struct QSVContext {
     uint32_t fourcc;
     mfxFrameInfo frame_info;
 
+    int initialized;
+
     // options set by the caller
     int async_depth;
     int iopattern;