diff mbox

[FFmpeg-devel,v2,3/6] lavc/qsvdec: Replace current parser with MFXVideoDECODE_DecodeHeader()

Message ID 20190220025821.31346-4-zhong.li@intel.com
State Superseded
Headers show

Commit Message

Zhong Li Feb. 20, 2019, 2:58 a.m. UTC
Using MSDK parser can improve qsv decoder pass rate in some cases (E.g:
sps declares a wrong level_idc, smaller than it should be).
And it is necessary for adding new qsv decoders such as MJPEG and VP9
since current parser can't provide enough information.
Actually using MFXVideoDECODE_DecodeHeader() was disscussed at
https://ffmpeg.org/pipermail/ffmpeg-devel/2015-July/175734.html and merged as commit 1acb19d,
but was overwritten when merged libav patches (commit: 1f26a23) without any explain.

v2: split decode header from decode_init, and call it for everyframe to
detect format/resoultion change. It can fix some regression issues such
as hevc 10bits decoding.

Signed-off-by: Zhong Li <zhong.li@intel.com>
---
 libavcodec/qsvdec.c | 172 ++++++++++++++++++++++----------------------
 libavcodec/qsvdec.h |   2 +
 2 files changed, 90 insertions(+), 84 deletions(-)

Comments

Rogozhkin, Dmitry V Feb. 20, 2019, 7:14 p.m. UTC | #1
On Wed, 2019-02-20 at 10:58 +0800, Zhong Li wrote:
> Using MSDK parser can improve qsv decoder pass rate in some cases

> (E.g:

> sps declares a wrong level_idc, smaller than it should be).

> And it is necessary for adding new qsv decoders such as MJPEG and VP9

> since current parser can't provide enough information.

> Actually using MFXVideoDECODE_DecodeHeader() was disscussed at

> https://ffmpeg.org/pipermail/ffmpeg-devel/2015-July/175734.html and

> merged as commit 1acb19d,

> but was overwritten when merged libav patches (commit: 1f26a23)

> without any explain.

> 

> v2: split decode header from decode_init, and call it for everyframe

> to

> detect format/resoultion change. It can fix some regression issues

> such

> as hevc 10bits decoding.

> 

> Signed-off-by: Zhong Li <zhong.li@intel.com>

> ---

>  libavcodec/qsvdec.c | 172 ++++++++++++++++++++++------------------

> ----

>  libavcodec/qsvdec.h |   2 +

>  2 files changed, 90 insertions(+), 84 deletions(-)

> 

> diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c

> index 4a0be811fb..efe054f5c5 100644

> --- a/libavcodec/qsvdec.c

> +++ b/libavcodec/qsvdec.c

> @@ -120,19 +120,18 @@ static inline unsigned int qsv_fifo_size(const

> AVFifoBuffer* fifo)

>      return av_fifo_size(fifo) / qsv_fifo_item_size();

>  }

>  

> -static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q)

> +static int qsv_decode_preinit(AVCodecContext *avctx, QSVContext *q,

> enum AVPixelFormat *pix_fmts, mfxVideoParam *param)

>  {

> -    const AVPixFmtDescriptor *desc;

>      mfxSession session = NULL;

>      int iopattern = 0;

> -    mfxVideoParam param = { 0 };

> -    int frame_width  = avctx->coded_width;

> -    int frame_height = avctx->coded_height;

>      int ret;

>  

> -    desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);

> -    if (!desc)

> -        return AVERROR_BUG;

> +    ret = ff_get_format(avctx, pix_fmts);

> +    if (ret < 0) {

> +        q->orig_pix_fmt = avctx->pix_fmt = AV_PIX_FMT_NONE;

> +        return ret;

> +    } else

> +        q->orig_pix_fmt = pix_fmts[1];


You rely on the way how pix_fmts will be declared in some other place.
This gives issues to understand the code for the beginners and hence
difficulty in code support. Can we somehow show what is actually going
on here? For example, declare pix_fmts something like:
enum qsv_formats {
  QSV,
  QSV_NV12,
  QSV_MAX
}

enum AVPixelFormat pix_fmts[QSV_MAX+1] = {
  [QSV] = AV_PIX_FMT_QSV,
  [QSV_NV12] = AV_PIX_FMT_NV12,
  [QSV_MAX] = AV_PIX_FMT_NONE
}

After that we could address like:
        q->orig_pix_fmt = pix_fmts[QSV_NV12];
both highlighting what we actually want to achieve and making sure that
we will have less mistakes supporting the list of fmts wish we reorder
something.

>  

>      if (!q->async_fifo) {

>          q->async_fifo = av_fifo_alloc(q->async_depth *

> qsv_fifo_item_size());

> @@ -170,48 +169,72 @@ static int qsv_decode_init(AVCodecContext

> *avctx, QSVContext *q)

>          return ret;

>      }

>  

> -    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);

> -    if (ret < 0)

> -        return ret;

> +    param->IOPattern   = q->iopattern;

> +    param->AsyncDepth  = q->async_depth;

> +    param->ExtParam    = q->ext_buffers;

> +    param->NumExtParam = q->nb_ext_buffers;

>  

> -    param.mfx.CodecId      = ret;

> -    param.mfx.CodecProfile = ff_qsv_profile_to_mfx(avctx->codec_id,

> avctx->profile);

> -    param.mfx.CodecLevel   = avctx->level == FF_LEVEL_UNKNOWN ?

> MFX_LEVEL_UNKNOWN : avctx->level;

> -

> -    param.mfx.FrameInfo.BitDepthLuma   = desc->comp[0].depth;

> -    param.mfx.FrameInfo.BitDepthChroma = desc->comp[0].depth;

> -    param.mfx.FrameInfo.Shift          = desc->comp[0].depth > 8;

> -    param.mfx.FrameInfo.FourCC         = q->fourcc;

> -    param.mfx.FrameInfo.Width          = frame_width;

> -    param.mfx.FrameInfo.Height         = frame_height;

> -    param.mfx.FrameInfo.ChromaFormat   = MFX_CHROMAFORMAT_YUV420;

> -

> -    switch (avctx->field_order) {

> -    case AV_FIELD_PROGRESSIVE:

> -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_PROGRESSIVE;

> -        break;

> -    case AV_FIELD_TT:

> -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_FIELD_TFF;

> -        break;

> -    case AV_FIELD_BB:

> -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_FIELD_BFF;

> -        break;

> -    default:

> -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_UNKNOWN;

> -        break;

> -    }

> +    return 0;

> + }

> +

> +static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q,

> mfxVideoParam *param)

> +{

> +    int ret;

>  

> -    param.IOPattern   = q->iopattern;

> -    param.AsyncDepth  = q->async_depth;

> -    param.ExtParam    = q->ext_buffers;

> -    param.NumExtParam = q->nb_ext_buffers;

> +    avctx->width        = param->mfx.FrameInfo.CropW;

> +    avctx->height       = param->mfx.FrameInfo.CropH;

> +    avctx->coded_width  = param->mfx.FrameInfo.Width;

> +    avctx->coded_height = param->mfx.FrameInfo.Height;

> +    avctx->level        = param->mfx.CodecLevel;

> +    avctx->profile      = param->mfx.CodecProfile;

> +    avctx->field_order  = ff_qsv_map_picstruct(param-

> >mfx.FrameInfo.PicStruct);

> +    avctx->pix_fmt      = ff_qsv_map_fourcc(param-

> >mfx.FrameInfo.FourCC);

>  

> -    ret = MFXVideoDECODE_Init(q->session, &param);

> +    ret = MFXVideoDECODE_Init(q->session, param);

>      if (ret < 0)

>          return ff_qsv_print_error(avctx, ret,

>                                    "Error initializing the MFX video

> decoder");

>  

> -    q->frame_info = param.mfx.FrameInfo;

> +    q->frame_info = param->mfx.FrameInfo;

> +

> +    return 0;

> +}

> +

> +static int qsv_decode_header(AVCodecContext *avctx, QSVContext *q,

> AVPacket *avpkt, enum AVPixelFormat *pix_fmts, mfxVideoParam *param)

> +{

> +    int ret;

> +

> +    mfxBitstream bs = { { { 0 } } };

> +

> +    if (avpkt->size) {

> +        bs.Data       = avpkt->data;

> +        bs.DataLength = avpkt->size;

> +        bs.MaxLength  = bs.DataLength;

> +        bs.TimeStamp  = avpkt->pts;

> +        if (avctx->field_order == AV_FIELD_PROGRESSIVE)

> +            bs.DataFlag   |= MFX_BITSTREAM_COMPLETE_FRAME;

> +    } else

> +        return AVERROR_INVALIDDATA;

> +

> +

> +    if(!q->session) {

> +        ret = qsv_decode_preinit(avctx, q, pix_fmts, param);

> +        if (ret < 0)

> +            return ret;

> +    }

> +

> +    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);

> +    if (ret < 0)

> +        return ret;

> +

> +    param->mfx.CodecId = ret;

> +    ret = MFXVideoDECODE_DecodeHeader(q->session, &bs, param);

> +    if (MFX_ERR_MORE_DATA == ret) {

> +       return AVERROR(EAGAIN);

> +    }

> +    if (ret < 0)

> +        return ff_qsv_print_error(avctx, ret,

> +                "Error decoding stream header");

>  

>      return 0;

>  }

> @@ -494,7 +517,11 @@ int ff_qsv_process_data(AVCodecContext *avctx,

> QSVContext *q,

>      uint8_t *dummy_data;

>      int dummy_size;

>      int ret;

> -    const AVPixFmtDescriptor *desc;

> +    mfxVideoParam param = { 0 };

> +    static enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,

> +                                              AV_PIX_FMT_NV12,

> +                                              AV_PIX_FMT_NONE };

> +


I am surprised to see 'static'. Why?

>  

>      if (!q->avctx_internal) {

>          q->avctx_internal = avcodec_alloc_context3(NULL);

> @@ -521,15 +548,13 @@ int ff_qsv_process_data(AVCodecContext *avctx,

> QSVContext *q,

>                       pkt->data, pkt->size, pkt->pts, pkt->dts,

>                       pkt->pos);

>  

> -    avctx->field_order  = q->parser->field_order;

>      /* TODO: flush delayed frames on reinit */

> -    if (q->parser->format       != q->orig_pix_fmt    ||

> -        FFALIGN(q->parser->coded_width, 16)  != FFALIGN(avctx-

> >coded_width, 16) ||

> -        FFALIGN(q->parser->coded_height, 16) != FFALIGN(avctx-

> >coded_height, 16)) {

> -        enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,

> -                                           AV_PIX_FMT_NONE,

> -                                           AV_PIX_FMT_NONE };

> -        enum AVPixelFormat qsv_format;

> +

> +    ret = qsv_decode_header(avctx, q, pkt, pix_fmts, &param);

> +

> +    if (ret >= 0 && (q->orig_pix_fmt !=

> ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC) ||

> +        avctx->coded_width  != param.mfx.FrameInfo.Width ||

> +        avctx->coded_height != param.mfx.FrameInfo.Height)) {

>          AVPacket zero_pkt = {0};

>  

>          if (q->buffered_count) {

> @@ -538,45 +563,23 @@ int ff_qsv_process_data(AVCodecContext *avctx,

> QSVContext *q,

>              q->buffered_count--;

>              return qsv_decode(avctx, q, frame, got_frame,

> &zero_pkt);

>          }

> -

>          q->reinit_flag = 0;

>  

> -        qsv_format = ff_qsv_map_pixfmt(q->parser->format, &q-

> >fourcc);

> -        if (qsv_format < 0) {

> -            av_log(avctx, AV_LOG_ERROR,

> -                   "Decoding pixel format '%s' is not supported\n",

> -                   av_get_pix_fmt_name(q->parser->format));

> -            ret = AVERROR(ENOSYS);

> -            goto reinit_fail;

> -        }

> -

> -        q->orig_pix_fmt     = q->parser->format;

> -        avctx->pix_fmt      = pix_fmts[1] = qsv_format;

> -        avctx->width        = q->parser->width;

> -        avctx->height       = q->parser->height;

> -        avctx->coded_width  = FFALIGN(q->parser->coded_width, 16);

> -        avctx->coded_height = FFALIGN(q->parser->coded_height, 16);

> -        avctx->level        = q->avctx_internal->level;

> -        avctx->profile      = q->avctx_internal->profile;

> -

> -        ret = ff_get_format(avctx, pix_fmts);

> +        avctx->pix_fmt = pix_fmts[1]  =

> ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC);


Here you modify pix_fmts which is declared static. If someone will use
QSV decoder few times in the same application in parallel he might
experience a race condition, isn't it?

> +        avctx->coded_width  = param.mfx.FrameInfo.Width;

> +        avctx->coded_height = param.mfx.FrameInfo.Height;

> +        

> +        ret = qsv_decode_preinit(avctx, q, pix_fmts, &param);

>          if (ret < 0)

>              goto reinit_fail;

> +        q->initialized = 0;

> +    }

>  

> -        avctx->pix_fmt = ret;

> -

> -        desc = av_pix_fmt_desc_get(avctx->pix_fmt);

> -        if (!desc)

> -            goto reinit_fail;

> -

> -         if (desc->comp[0].depth > 8) {

> -            avctx->coded_width =  FFALIGN(q->parser->coded_width,

> 32);

> -            avctx->coded_height = FFALIGN(q->parser->coded_height,

> 32);

> -        }

> -

> -        ret = qsv_decode_init(avctx, q);

> +    if (!q->initialized) {

> +        ret = qsv_decode_init(avctx, q, &param);

>          if (ret < 0)

>              goto reinit_fail;

> +        q->initialized = 1;

>      }

>  

>      return qsv_decode(avctx, q, frame, got_frame, pkt);

> @@ -589,4 +592,5 @@ reinit_fail:

>  void ff_qsv_decode_flush(AVCodecContext *avctx, QSVContext *q)

>  {

>      q->orig_pix_fmt = AV_PIX_FMT_NONE;

> +    q->initialized = 0;

>  }

> diff --git a/libavcodec/qsvdec.h b/libavcodec/qsvdec.h

> index 111536caba..4812fb2a6b 100644

> --- a/libavcodec/qsvdec.h

> +++ b/libavcodec/qsvdec.h

> @@ -63,6 +63,8 @@ typedef struct QSVContext {

>      uint32_t fourcc;

>      mfxFrameInfo frame_info;

>  

> +    int initialized;

> +

>      // options set by the caller

>      int async_depth;

>      int iopattern;
Mark Thompson Feb. 20, 2019, 9:31 p.m. UTC | #2
On 20/02/2019 02:58, Zhong Li wrote:
> Using MSDK parser can improve qsv decoder pass rate in some cases (E.g:
> sps declares a wrong level_idc, smaller than it should be).
> And it is necessary for adding new qsv decoders such as MJPEG and VP9
> since current parser can't provide enough information.

Can you explain the problem with level_idc?  Why would the libmfx parser determine a different answer?

Given that you need the current parser anyway (see previous mail), it would likely be more useful to extend it to supply any information which is missing.

> Actually using MFXVideoDECODE_DecodeHeader() was disscussed at
> https://ffmpeg.org/pipermail/ffmpeg-devel/2015-July/175734.html and merged as commit 1acb19d,
> but was overwritten when merged libav patches (commit: 1f26a23) without any explain.

I'm not sure where the explanation for this went; maybe it was only discussed on IRC.

The reason for using the internal parsers is that you need the information before libmfx is initialised at all in the hw_frames_ctx case (i.e. before the get_format callback which will supply the hardware context information), and once you require that anyway there isn't much point in parsing things twice for the same information.

It's probably fine to parse it twice if you want, but the two cases really should be returning the same information.

> v2: split decode header from decode_init, and call it for everyframe to
> detect format/resoultion change. It can fix some regression issues such
> as hevc 10bits decoding.
> 
> Signed-off-by: Zhong Li <zhong.li@intel.com>
> ---
>  libavcodec/qsvdec.c | 172 ++++++++++++++++++++++----------------------
>  libavcodec/qsvdec.h |   2 +
>  2 files changed, 90 insertions(+), 84 deletions(-)

- Mark
Carl Eugen Hoyos Feb. 21, 2019, 12:26 a.m. UTC | #3
2019-02-20 3:58 GMT+01:00, Zhong Li <zhong.li@intel.com>:

> And it is necessary for adding new qsv decoders such as
> MJPEG and VP9 since current parser can't provide
> enough information.

Just curious: What information is missing?

Carl Eugen
Zhong Li Feb. 21, 2019, 5:43 a.m. UTC | #4
> From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On Behalf

> Of Mark Thompson

> Sent: Thursday, February 21, 2019 5:32 AM

> To: ffmpeg-devel@ffmpeg.org

> Subject: Re: [FFmpeg-devel] [PATCH v2 3/6] lavc/qsvdec: Replace current

> parser with MFXVideoDECODE_DecodeHeader()

> 

> On 20/02/2019 02:58, Zhong Li wrote:

> > Using MSDK parser can improve qsv decoder pass rate in some cases (E.g:

> > sps declares a wrong level_idc, smaller than it should be).

> > And it is necessary for adding new qsv decoders such as MJPEG and VP9

> > since current parser can't provide enough information.

>

> Can you explain the problem with level_idc?  Why would the libmfx parser

> determine a different answer?


Detail discussion is here: https://github.com/Intel-Media-SDK/MediaSDK/issues/582 
"Some clips declare a wrong level_idc, smaller than it should be", for example, a clip declare a level_idc= 1.0, but other sps/pps paramters such as resolution or reference number is out of scope. 
I believe this is a very common issue, many clips don't declare a correct level, and it should be decoded with decoder error detecting.
Currently internal parser is just reading what is the level_idc is, there is no error handing. Thus making MSDK decoding error

> Given that you need the current parser anyway (see previous mail), it would

> likely be more useful to extend it to supply any information which is missing.

> 

> > Actually using MFXVideoDECODE_DecodeHeader() was disscussed at

> > https://ffmpeg.org/pipermail/ffmpeg-devel/2015-July/175734.html and

> > merged as commit 1acb19d, but was overwritten when merged libav

> patches (commit: 1f26a23) without any explain.

> 

> I'm not sure where the explanation for this went; maybe it was only

> discussed on IRC.

> 

> The reason for using the internal parsers is that you need the information

> before libmfx is initialized at all in the hw_frames_ctx case (i.e. before the

> get_format callback which will supply the hardware context information),

> and once you require that anyway there isn't much point in parsing things

> twice for the same information.


As I see, there are very limited information needed before init a libmfx session (we must <and only need> init a libmfx session before call MFXVideoDECODE_DecodeHeader() ): There are resolution and pix_fmt. 
As you can see from my current implementation, I don't call internal parser before init the session, we are assuming a resolution (It may be provided from libavformat, but not must as Hendrik's comment) and pix_fmt (we assume it is NV12), and will correct it after MFXVideoDECODE_DecodeHeader().
It probably means we need to init the session twice for the first decoding call (e.g hevc/vp9 10bit clips, or resolution is not provided somewhere such as libavformat), but it is just happens when the first call (if header information is not changed after that) and the assumed resolution/pix_fmt is not correct.  
I think it is higher efficient than parse twice for every decoding calling, and it is not a workaround way since we still need to handling resolution/pix_fmt changing cases. 

> It's probably fine to parse it twice if you want, but the two cases really

> should be returning the same information.
Zhong Li Feb. 21, 2019, 8:19 a.m. UTC | #5
> From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On Behalf

> Of Carl Eugen Hoyos

> Sent: Thursday, February 21, 2019 8:26 AM

> To: FFmpeg development discussions and patches

> <ffmpeg-devel@ffmpeg.org>

> Subject: Re: [FFmpeg-devel] [PATCH v2 3/6] lavc/qsvdec: Replace current

> parser with MFXVideoDECODE_DecodeHeader()

> 

> 2019-02-20 3:58 GMT+01:00, Zhong Li <zhong.li@intel.com>:

> 

> > And it is necessary for adding new qsv decoders such as MJPEG and VP9

> > since current parser can't provide enough information.

> 

> Just curious: What information is missing?

> 

> Carl Eugen


To init MSDK decoder, width/height/pix_fmt/picture_struct/level/profile are needed.
But currently, mjpeg_parser.c provides nothing of them. VP9 also provides very few information. 

If we want to further discussion about adding the missing information, probably we can continue in Mark's thread.
Zhong Li Feb. 21, 2019, 9:03 a.m. UTC | #6
> From: Rogozhkin, Dmitry V

> Sent: Thursday, February 21, 2019 3:14 AM

> To: ffmpeg-devel@ffmpeg.org

> Cc: Li, Zhong <zhong.li@intel.com>

> Subject: Re: [FFmpeg-devel] [PATCH v2 3/6] lavc/qsvdec: Replace current

> parser with MFXVideoDECODE_DecodeHeader()

> 

> On Wed, 2019-02-20 at 10:58 +0800, Zhong Li wrote:

> > Using MSDK parser can improve qsv decoder pass rate in some cases

> > (E.g:

> > sps declares a wrong level_idc, smaller than it should be).

> > And it is necessary for adding new qsv decoders such as MJPEG and VP9

> > since current parser can't provide enough information.

> > Actually using MFXVideoDECODE_DecodeHeader() was disscussed at

> > https://ffmpeg.org/pipermail/ffmpeg-devel/2015-July/175734.html and

> > merged as commit 1acb19d, but was overwritten when merged libav

> > patches (commit: 1f26a23) without any explain.

> >

> > v2: split decode header from decode_init, and call it for everyframe

> > to detect format/resoultion change. It can fix some regression issues

> > such as hevc 10bits decoding.

> >

> > Signed-off-by: Zhong Li <zhong.li@intel.com>

> > ---

> >  libavcodec/qsvdec.c | 172 ++++++++++++++++++++++------------------

> > ----

> >  libavcodec/qsvdec.h |   2 +

> >  2 files changed, 90 insertions(+), 84 deletions(-)

> >

> > diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c index

> > 4a0be811fb..efe054f5c5 100644

> > --- a/libavcodec/qsvdec.c

> > +++ b/libavcodec/qsvdec.c

> > @@ -120,19 +120,18 @@ static inline unsigned int qsv_fifo_size(const

> > AVFifoBuffer* fifo)

> >      return av_fifo_size(fifo) / qsv_fifo_item_size();

> >  }

> >

> > -static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q)

> > +static int qsv_decode_preinit(AVCodecContext *avctx, QSVContext *q,

> > enum AVPixelFormat *pix_fmts, mfxVideoParam *param)

> >  {

> > -    const AVPixFmtDescriptor *desc;

> >      mfxSession session = NULL;

> >      int iopattern = 0;

> > -    mfxVideoParam param = { 0 };

> > -    int frame_width  = avctx->coded_width;

> > -    int frame_height = avctx->coded_height;

> >      int ret;

> >

> > -    desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);

> > -    if (!desc)

> > -        return AVERROR_BUG;

> > +    ret = ff_get_format(avctx, pix_fmts);

> > +    if (ret < 0) {

> > +        q->orig_pix_fmt = avctx->pix_fmt = AV_PIX_FMT_NONE;

> > +        return ret;

> > +    } else

> > +        q->orig_pix_fmt = pix_fmts[1];

> 

> You rely on the way how pix_fmts will be declared in some other place.

> This gives issues to understand the code for the beginners and hence

> difficulty in code support. Can we somehow show what is actually going on

> here? For example, declare pix_fmts something like:

> enum qsv_formats {

>   QSV,

>   QSV_NV12,

>   QSV_MAX

> }

> 

> enum AVPixelFormat pix_fmts[QSV_MAX+1] = {

>   [QSV] = AV_PIX_FMT_QSV,

>   [QSV_NV12] = AV_PIX_FMT_NV12,

>   [QSV_MAX] = AV_PIX_FMT_NONE

> }

> 

> After that we could address like:

>         q->orig_pix_fmt = pix_fmts[QSV_NV12]; both highlighting what

> we actually want to achieve and making sure that we will have less mistakes

> supporting the list of fmts wish we reorder something.


QSV sw_pix_fmt is not always NV12. For 10bit clips, we just assume it is NV12, after decoder_header called, it will be updated as P010.

> >

> >      if (!q->async_fifo) {

> >          q->async_fifo = av_fifo_alloc(q->async_depth *

> > qsv_fifo_item_size()); @@ -170,48 +169,72 @@ static int

> > qsv_decode_init(AVCodecContext *avctx, QSVContext *q)

> >          return ret;

> >      }

> >

> > -    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);

> > -    if (ret < 0)

> > -        return ret;

> > +    param->IOPattern   = q->iopattern;

> > +    param->AsyncDepth  = q->async_depth;

> > +    param->ExtParam    = q->ext_buffers;

> > +    param->NumExtParam = q->nb_ext_buffers;

> >

> > -    param.mfx.CodecId      = ret;

> > -    param.mfx.CodecProfile = ff_qsv_profile_to_mfx(avctx->codec_id,

> > avctx->profile);

> > -    param.mfx.CodecLevel   = avctx->level ==

> FF_LEVEL_UNKNOWN ?

> > MFX_LEVEL_UNKNOWN : avctx->level;

> > -

> > -    param.mfx.FrameInfo.BitDepthLuma   = desc->comp[0].depth;

> > -    param.mfx.FrameInfo.BitDepthChroma = desc->comp[0].depth;

> > -    param.mfx.FrameInfo.Shift          = desc->comp[0].depth >

> 8;

> > -    param.mfx.FrameInfo.FourCC         = q->fourcc;

> > -    param.mfx.FrameInfo.Width          = frame_width;

> > -    param.mfx.FrameInfo.Height         = frame_height;

> > -    param.mfx.FrameInfo.ChromaFormat   =

> MFX_CHROMAFORMAT_YUV420;

> > -

> > -    switch (avctx->field_order) {

> > -    case AV_FIELD_PROGRESSIVE:

> > -        param.mfx.FrameInfo.PicStruct =

> MFX_PICSTRUCT_PROGRESSIVE;

> > -        break;

> > -    case AV_FIELD_TT:

> > -        param.mfx.FrameInfo.PicStruct =

> MFX_PICSTRUCT_FIELD_TFF;

> > -        break;

> > -    case AV_FIELD_BB:

> > -        param.mfx.FrameInfo.PicStruct =

> MFX_PICSTRUCT_FIELD_BFF;

> > -        break;

> > -    default:

> > -        param.mfx.FrameInfo.PicStruct =

> MFX_PICSTRUCT_UNKNOWN;

> > -        break;

> > -    }

> > +    return 0;

> > + }

> > +

> > +static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q,

> > mfxVideoParam *param)

> > +{

> > +    int ret;

> >

> > -    param.IOPattern   = q->iopattern;

> > -    param.AsyncDepth  = q->async_depth;

> > -    param.ExtParam    = q->ext_buffers;

> > -    param.NumExtParam = q->nb_ext_buffers;

> > +    avctx->width        = param->mfx.FrameInfo.CropW;

> > +    avctx->height       = param->mfx.FrameInfo.CropH;

> > +    avctx->coded_width  = param->mfx.FrameInfo.Width;

> > +    avctx->coded_height = param->mfx.FrameInfo.Height;

> > +    avctx->level        = param->mfx.CodecLevel;

> > +    avctx->profile      = param->mfx.CodecProfile;

> > +    avctx->field_order  = ff_qsv_map_picstruct(param-

> > >mfx.FrameInfo.PicStruct);

> > +    avctx->pix_fmt      = ff_qsv_map_fourcc(param-

> > >mfx.FrameInfo.FourCC);

> >

> > -    ret = MFXVideoDECODE_Init(q->session, &param);

> > +    ret = MFXVideoDECODE_Init(q->session, param);

> >      if (ret < 0)

> >          return ff_qsv_print_error(avctx, ret,

> >                                    "Error initializing the

> MFX video

> > decoder");

> >

> > -    q->frame_info = param.mfx.FrameInfo;

> > +    q->frame_info = param->mfx.FrameInfo;

> > +

> > +    return 0;

> > +}

> > +

> > +static int qsv_decode_header(AVCodecContext *avctx, QSVContext *q,

> > AVPacket *avpkt, enum AVPixelFormat *pix_fmts, mfxVideoParam *param)

> > +{

> > +    int ret;

> > +

> > +    mfxBitstream bs = { { { 0 } } };

> > +

> > +    if (avpkt->size) {

> > +        bs.Data       = avpkt->data;

> > +        bs.DataLength = avpkt->size;

> > +        bs.MaxLength  = bs.DataLength;

> > +        bs.TimeStamp  = avpkt->pts;

> > +        if (avctx->field_order == AV_FIELD_PROGRESSIVE)

> > +            bs.DataFlag   |=

> MFX_BITSTREAM_COMPLETE_FRAME;

> > +    } else

> > +        return AVERROR_INVALIDDATA;

> > +

> > +

> > +    if(!q->session) {

> > +        ret = qsv_decode_preinit(avctx, q, pix_fmts, param);

> > +        if (ret < 0)

> > +            return ret;

> > +    }

> > +

> > +    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);

> > +    if (ret < 0)

> > +        return ret;

> > +

> > +    param->mfx.CodecId = ret;

> > +    ret = MFXVideoDECODE_DecodeHeader(q->session, &bs, param);

> > +    if (MFX_ERR_MORE_DATA == ret) {

> > +       return AVERROR(EAGAIN);

> > +    }

> > +    if (ret < 0)

> > +        return ff_qsv_print_error(avctx, ret,

> > +                "Error decoding stream header");

> >

> >      return 0;

> >  }

> > @@ -494,7 +517,11 @@ int ff_qsv_process_data(AVCodecContext *avctx,

> > QSVContext *q,

> >      uint8_t *dummy_data;

> >      int dummy_size;

> >      int ret;

> > -    const AVPixFmtDescriptor *desc;

> > +    mfxVideoParam param = { 0 };

> > +    static enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,

> >

> +                                              AV_PI

> X_FMT_NV12,

> >

> +                                              AV_PI

> X_FMT_NONE };

> > +

> 

> I am surprised to see 'static'. Why?

>

> >

> >      if (!q->avctx_internal) {

> >          q->avctx_internal = avcodec_alloc_context3(NULL); @@

> -521,15

> > +548,13 @@ int ff_qsv_process_data(AVCodecContext *avctx,

> QSVContext

> > *q,

> >                       pkt->data, pkt->size, pkt->pts, pkt->dts,

> >                       pkt->pos);

> >

> > -    avctx->field_order  = q->parser->field_order;

> >      /* TODO: flush delayed frames on reinit */

> > -    if (q->parser->format       != q->orig_pix_fmt    ||

> > -        FFALIGN(q->parser->coded_width, 16)  != FFALIGN(avctx-

> > >coded_width, 16) ||

> > -        FFALIGN(q->parser->coded_height, 16) != FFALIGN(avctx-

> > >coded_height, 16)) {

> > -        enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,

> >

> -                                           AV_PIX_F

> MT_NONE,

> >

> -                                           AV_PIX_F

> MT_NONE };

> > -        enum AVPixelFormat qsv_format;

> > +

> > +    ret = qsv_decode_header(avctx, q, pkt, pix_fmts, &param);

> > +

> > +    if (ret >= 0 && (q->orig_pix_fmt !=

> > ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC) ||

> > +        avctx->coded_width  != param.mfx.FrameInfo.Width ||

> > +        avctx->coded_height != param.mfx.FrameInfo.Height)) {

> >          AVPacket zero_pkt = {0};

> >

> >          if (q->buffered_count) {

> > @@ -538,45 +563,23 @@ int ff_qsv_process_data(AVCodecContext

> *avctx,

> > QSVContext *q,

> >              q->buffered_count--;

> >              return qsv_decode(avctx, q, frame, got_frame,

> &zero_pkt);

> >          }

> > -

> >          q->reinit_flag = 0;

> >

> > -        qsv_format = ff_qsv_map_pixfmt(q->parser->format, &q-

> > >fourcc);

> > -        if (qsv_format < 0) {

> > -            av_log(avctx, AV_LOG_ERROR,

> > -                   "Decoding pixel format '%s' is not

> supported\n",

> > -                   av_get_pix_fmt_name(q->parser->format));

> > -            ret = AVERROR(ENOSYS);

> > -            goto reinit_fail;

> > -        }

> > -

> > -        q->orig_pix_fmt     = q->parser->format;

> > -        avctx->pix_fmt      = pix_fmts[1] = qsv_format;

> > -        avctx->width        = q->parser->width;

> > -        avctx->height       = q->parser->height;

> > -        avctx->coded_width  = FFALIGN(q->parser->coded_width,

> 16);

> > -        avctx->coded_height = FFALIGN(q->parser->coded_height,

> 16);

> > -        avctx->level        = q->avctx_internal->level;

> > -        avctx->profile      = q->avctx_internal->profile;

> > -

> > -        ret = ff_get_format(avctx, pix_fmts);

> > +        avctx->pix_fmt = pix_fmts[1]  =

> > ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC);

> 

> Here you modify pix_fmts which is declared static. If someone will use QSV

> decoder few times in the same application in parallel he might experience a

> race condition, isn't it?


Good question. Declare it as static is just for avoiding unnecessary reinit a session.
Take hevc 10bits clips as example, we assume pix_fmts[1] is NV12 and then init a session and decode_header, after that, we know it is not NV12 but is P010,
Then we need to reinit a session. If it is static, this reinit action is only needed for the first decoding call, else we need to reinit for every frame.
However, you gave me a good remind to give another solution: remove the static keywords, and set pix_fmts[1] = q->orig_pix_fmt (and relative code should be refined too).

> > +        avctx->coded_width  = param.mfx.FrameInfo.Width;

> > +        avctx->coded_height = param.mfx.FrameInfo.Height;

> > +

> > +        ret = qsv_decode_preinit(avctx, q, pix_fmts, &param);

> >          if (ret < 0)

> >              goto reinit_fail;

> > +        q->initialized = 0;

> > +    }

> >

> > -        avctx->pix_fmt = ret;

> > -

> > -        desc = av_pix_fmt_desc_get(avctx->pix_fmt);

> > -        if (!desc)

> > -            goto reinit_fail;

> > -

> > -         if (desc->comp[0].depth > 8) {

> > -            avctx->coded_width

> =  FFALIGN(q->parser->coded_width,

> > 32);

> > -            avctx->coded_height =

> FFALIGN(q->parser->coded_height,

> > 32);

> > -        }

> > -

> > -        ret = qsv_decode_init(avctx, q);

> > +    if (!q->initialized) {

> > +        ret = qsv_decode_init(avctx, q, &param);

> >          if (ret < 0)

> >              goto reinit_fail;

> > +        q->initialized = 1;

> >      }

> >

> >      return qsv_decode(avctx, q, frame, got_frame, pkt); @@ -589,4

> > +592,5 @@ reinit_fail:

> >  void ff_qsv_decode_flush(AVCodecContext *avctx, QSVContext *q)

> >  {

> >      q->orig_pix_fmt = AV_PIX_FMT_NONE;

> > +    q->initialized = 0;

> >  }

> > diff --git a/libavcodec/qsvdec.h b/libavcodec/qsvdec.h index

> > 111536caba..4812fb2a6b 100644

> > --- a/libavcodec/qsvdec.h

> > +++ b/libavcodec/qsvdec.h

> > @@ -63,6 +63,8 @@ typedef struct QSVContext {

> >      uint32_t fourcc;

> >      mfxFrameInfo frame_info;

> >

> > +    int initialized;

> > +

> >      // options set by the caller

> >      int async_depth;

> >      int iopattern;
Fu, Linjie Feb. 22, 2019, 3:30 a.m. UTC | #7
> -----Original Message-----

> From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On Behalf

> Of Zhong Li

> Sent: Wednesday, February 20, 2019 10:58

> To: ffmpeg-devel@ffmpeg.org

> Cc: Li, Zhong <zhong.li@intel.com>

> Subject: [FFmpeg-devel] [PATCH v2 3/6] lavc/qsvdec: Replace current parser

> with MFXVideoDECODE_DecodeHeader()

> 

> Using MSDK parser can improve qsv decoder pass rate in some cases (E.g:

> sps declares a wrong level_idc, smaller than it should be).

> And it is necessary for adding new qsv decoders such as MJPEG and VP9

> since current parser can't provide enough information.

> Actually using MFXVideoDECODE_DecodeHeader() was disscussed at

> https://ffmpeg.org/pipermail/ffmpeg-devel/2015-July/175734.html and

> merged as commit 1acb19d,

> but was overwritten when merged libav patches (commit: 1f26a23) without

> any explain.

> 

> v2: split decode header from decode_init, and call it for everyframe to

> detect format/resoultion change. It can fix some regression issues such

> as hevc 10bits decoding.

> 

> Signed-off-by: Zhong Li <zhong.li@intel.com>

> ---

>  libavcodec/qsvdec.c | 172 ++++++++++++++++++++++----------------------

>  libavcodec/qsvdec.h |   2 +

>  2 files changed, 90 insertions(+), 84 deletions(-)

> 

> diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c

> index 4a0be811fb..efe054f5c5 100644

> --- a/libavcodec/qsvdec.c

> +++ b/libavcodec/qsvdec.c

> @@ -120,19 +120,18 @@ static inline unsigned int qsv_fifo_size(const

> AVFifoBuffer* fifo)

>      return av_fifo_size(fifo) / qsv_fifo_item_size();

>  }

> 

> -static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q)

> +static int qsv_decode_preinit(AVCodecContext *avctx, QSVContext *q,

> enum AVPixelFormat *pix_fmts, mfxVideoParam *param)

>  {

> -    const AVPixFmtDescriptor *desc;

>      mfxSession session = NULL;

>      int iopattern = 0;

> -    mfxVideoParam param = { 0 };

> -    int frame_width  = avctx->coded_width;

> -    int frame_height = avctx->coded_height;

>      int ret;

> 

> -    desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);

> -    if (!desc)

> -        return AVERROR_BUG;

> +    ret = ff_get_format(avctx, pix_fmts);

> +    if (ret < 0) {

> +        q->orig_pix_fmt = avctx->pix_fmt = AV_PIX_FMT_NONE;

> +        return ret;

> +    } else

> +        q->orig_pix_fmt = pix_fmts[1];

> 

>      if (!q->async_fifo) {

>          q->async_fifo = av_fifo_alloc(q->async_depth * qsv_fifo_item_size());

> @@ -170,48 +169,72 @@ static int qsv_decode_init(AVCodecContext *avctx,

> QSVContext *q)

>          return ret;

>      }

> 

> -    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);

> -    if (ret < 0)

> -        return ret;

> +    param->IOPattern   = q->iopattern;

> +    param->AsyncDepth  = q->async_depth;

> +    param->ExtParam    = q->ext_buffers;

> +    param->NumExtParam = q->nb_ext_buffers;

> 

> -    param.mfx.CodecId      = ret;

> -    param.mfx.CodecProfile = ff_qsv_profile_to_mfx(avctx->codec_id, avctx-

> >profile);

> -    param.mfx.CodecLevel   = avctx->level == FF_LEVEL_UNKNOWN ?

> MFX_LEVEL_UNKNOWN : avctx->level;

> -

> -    param.mfx.FrameInfo.BitDepthLuma   = desc->comp[0].depth;

> -    param.mfx.FrameInfo.BitDepthChroma = desc->comp[0].depth;

> -    param.mfx.FrameInfo.Shift          = desc->comp[0].depth > 8;

> -    param.mfx.FrameInfo.FourCC         = q->fourcc;

> -    param.mfx.FrameInfo.Width          = frame_width;

> -    param.mfx.FrameInfo.Height         = frame_height;

> -    param.mfx.FrameInfo.ChromaFormat   = MFX_CHROMAFORMAT_YUV420;

> -

> -    switch (avctx->field_order) {

> -    case AV_FIELD_PROGRESSIVE:

> -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_PROGRESSIVE;

> -        break;

> -    case AV_FIELD_TT:

> -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_FIELD_TFF;

> -        break;

> -    case AV_FIELD_BB:

> -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_FIELD_BFF;

> -        break;

> -    default:

> -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_UNKNOWN;

> -        break;

> -    }

> +    return 0;

> + }

> +

> +static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q,

> mfxVideoParam *param)

> +{

> +    int ret;

> 

> -    param.IOPattern   = q->iopattern;

> -    param.AsyncDepth  = q->async_depth;

> -    param.ExtParam    = q->ext_buffers;

> -    param.NumExtParam = q->nb_ext_buffers;

> +    avctx->width        = param->mfx.FrameInfo.CropW;

> +    avctx->height       = param->mfx.FrameInfo.CropH;

> +    avctx->coded_width  = param->mfx.FrameInfo.Width;

> +    avctx->coded_height = param->mfx.FrameInfo.Height;

> +    avctx->level        = param->mfx.CodecLevel;

> +    avctx->profile      = param->mfx.CodecProfile;

> +    avctx->field_order  = ff_qsv_map_picstruct(param-

> >mfx.FrameInfo.PicStruct);

> +    avctx->pix_fmt      = ff_qsv_map_fourcc(param->mfx.FrameInfo.FourCC);

> 

> -    ret = MFXVideoDECODE_Init(q->session, &param);

> +    ret = MFXVideoDECODE_Init(q->session, param);

>      if (ret < 0)

>          return ff_qsv_print_error(avctx, ret,

>                                    "Error initializing the MFX video decoder");

> 

> -    q->frame_info = param.mfx.FrameInfo;

> +    q->frame_info = param->mfx.FrameInfo;

> +

> +    return 0;

> +}

> +

> +static int qsv_decode_header(AVCodecContext *avctx, QSVContext *q,

> AVPacket *avpkt, enum AVPixelFormat *pix_fmts, mfxVideoParam *param)

> +{

> +    int ret;

> +

> +    mfxBitstream bs = { { { 0 } } };

> +

> +    if (avpkt->size) {

> +        bs.Data       = avpkt->data;

> +        bs.DataLength = avpkt->size;

> +        bs.MaxLength  = bs.DataLength;

> +        bs.TimeStamp  = avpkt->pts;

> +        if (avctx->field_order == AV_FIELD_PROGRESSIVE)

> +            bs.DataFlag   |= MFX_BITSTREAM_COMPLETE_FRAME;

> +    } else

> +        return AVERROR_INVALIDDATA;

> +

> +

> +    if(!q->session) {

> +        ret = qsv_decode_preinit(avctx, q, pix_fmts, param);

> +        if (ret < 0)

> +            return ret;

> +    }

> +

> +    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);

> +    if (ret < 0)

> +        return ret;

> +

> +    param->mfx.CodecId = ret;

> +    ret = MFXVideoDECODE_DecodeHeader(q->session, &bs, param);

> +    if (MFX_ERR_MORE_DATA == ret) {

> +       return AVERROR(EAGAIN);

> +    }

> +    if (ret < 0)

> +        return ff_qsv_print_error(avctx, ret,

> +                "Error decoding stream header");

> 

>      return 0;

>  }

> @@ -494,7 +517,11 @@ int ff_qsv_process_data(AVCodecContext *avctx,

> QSVContext *q,

>      uint8_t *dummy_data;

>      int dummy_size;

>      int ret;

> -    const AVPixFmtDescriptor *desc;

> +    mfxVideoParam param = { 0 };

> +    static enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,

> +                                              AV_PIX_FMT_NV12,

> +                                              AV_PIX_FMT_NONE };

> +

> 

>      if (!q->avctx_internal) {

>          q->avctx_internal = avcodec_alloc_context3(NULL);

> @@ -521,15 +548,13 @@ int ff_qsv_process_data(AVCodecContext *avctx,

> QSVContext *q,

>                       pkt->data, pkt->size, pkt->pts, pkt->dts,

>                       pkt->pos);

> 

> -    avctx->field_order  = q->parser->field_order;

>      /* TODO: flush delayed frames on reinit */

> -    if (q->parser->format       != q->orig_pix_fmt    ||

> -        FFALIGN(q->parser->coded_width, 16)  != FFALIGN(avctx-

> >coded_width, 16) ||

> -        FFALIGN(q->parser->coded_height, 16) != FFALIGN(avctx-

> >coded_height, 16)) {

> -        enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,

> -                                           AV_PIX_FMT_NONE,

> -                                           AV_PIX_FMT_NONE };

> -        enum AVPixelFormat qsv_format;

> +

> +    ret = qsv_decode_header(avctx, q, pkt, pix_fmts, &param);

> +

> +    if (ret >= 0 && (q->orig_pix_fmt !=

> ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC) ||

> +        avctx->coded_width  != param.mfx.FrameInfo.Width ||

> +        avctx->coded_height != param.mfx.FrameInfo.Height)) {


Verified in the small resolution change cases:
MSDK parser doesn't expose the parsed pic_width and pic_height directly in FrameInfo.Width and FrameInfo.Height, they are all 16 aligned values, see:
https://github.com/Intel-Media-SDK/MediaSDK/blob/99b9eb402aeabad20636462b954bb7a9408fadfd/_studio/shared/umc/codec/h265_dec/src/umc_h265_mfx_utils.cpp#L308
So it will still result in garbage.

seq->pic_width_in_luma_samples and seq->pic_height_in_luma_samples should be exposed in some way to detect the small resolution change.

>          AVPacket zero_pkt = {0};

> 

>          if (q->buffered_count) {

> @@ -538,45 +563,23 @@ int ff_qsv_process_data(AVCodecContext *avctx,

> QSVContext *q,

>              q->buffered_count--;

>              return qsv_decode(avctx, q, frame, got_frame, &zero_pkt);

>          }

> -

>          q->reinit_flag = 0;

> 

> -        qsv_format = ff_qsv_map_pixfmt(q->parser->format, &q->fourcc);

> -        if (qsv_format < 0) {

> -            av_log(avctx, AV_LOG_ERROR,

> -                   "Decoding pixel format '%s' is not supported\n",

> -                   av_get_pix_fmt_name(q->parser->format));

> -            ret = AVERROR(ENOSYS);

> -            goto reinit_fail;

> -        }

> -

> -        q->orig_pix_fmt     = q->parser->format;

> -        avctx->pix_fmt      = pix_fmts[1] = qsv_format;

> -        avctx->width        = q->parser->width;

> -        avctx->height       = q->parser->height;

> -        avctx->coded_width  = FFALIGN(q->parser->coded_width, 16);

> -        avctx->coded_height = FFALIGN(q->parser->coded_height, 16);

> -        avctx->level        = q->avctx_internal->level;

> -        avctx->profile      = q->avctx_internal->profile;

> -

> -        ret = ff_get_format(avctx, pix_fmts);

> +        avctx->pix_fmt = pix_fmts[1]  =

> ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC);

> +        avctx->coded_width  = param.mfx.FrameInfo.Width;

> +        avctx->coded_height = param.mfx.FrameInfo.Height;

> +

> +        ret = qsv_decode_preinit(avctx, q, pix_fmts, &param);

>          if (ret < 0)

>              goto reinit_fail;

> +        q->initialized = 0;

> +    }

> 

> -        avctx->pix_fmt = ret;

> -

> -        desc = av_pix_fmt_desc_get(avctx->pix_fmt);

> -        if (!desc)

> -            goto reinit_fail;

> -

> -         if (desc->comp[0].depth > 8) {

> -            avctx->coded_width =  FFALIGN(q->parser->coded_width, 32);

> -            avctx->coded_height = FFALIGN(q->parser->coded_height, 32);

> -        }

> -

> -        ret = qsv_decode_init(avctx, q);

> +    if (!q->initialized) {

> +        ret = qsv_decode_init(avctx, q, &param);

>          if (ret < 0)

>              goto reinit_fail;

> +        q->initialized = 1;

>      }

> 

>      return qsv_decode(avctx, q, frame, got_frame, pkt);

> @@ -589,4 +592,5 @@ reinit_fail:

>  void ff_qsv_decode_flush(AVCodecContext *avctx, QSVContext *q)

>  {

>      q->orig_pix_fmt = AV_PIX_FMT_NONE;

> +    q->initialized = 0;

>  }

> diff --git a/libavcodec/qsvdec.h b/libavcodec/qsvdec.h

> index 111536caba..4812fb2a6b 100644

> --- a/libavcodec/qsvdec.h

> +++ b/libavcodec/qsvdec.h

> @@ -63,6 +63,8 @@ typedef struct QSVContext {

>      uint32_t fourcc;

>      mfxFrameInfo frame_info;

> 

> +    int initialized;

> +

>      // options set by the caller

>      int async_depth;

>      int iopattern;

> --

> 2.17.1

> 

> _______________________________________________

> ffmpeg-devel mailing list

> ffmpeg-devel@ffmpeg.org

> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
diff mbox

Patch

diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c
index 4a0be811fb..efe054f5c5 100644
--- a/libavcodec/qsvdec.c
+++ b/libavcodec/qsvdec.c
@@ -120,19 +120,18 @@  static inline unsigned int qsv_fifo_size(const AVFifoBuffer* fifo)
     return av_fifo_size(fifo) / qsv_fifo_item_size();
 }
 
-static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q)
+static int qsv_decode_preinit(AVCodecContext *avctx, QSVContext *q, enum AVPixelFormat *pix_fmts, mfxVideoParam *param)
 {
-    const AVPixFmtDescriptor *desc;
     mfxSession session = NULL;
     int iopattern = 0;
-    mfxVideoParam param = { 0 };
-    int frame_width  = avctx->coded_width;
-    int frame_height = avctx->coded_height;
     int ret;
 
-    desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
-    if (!desc)
-        return AVERROR_BUG;
+    ret = ff_get_format(avctx, pix_fmts);
+    if (ret < 0) {
+        q->orig_pix_fmt = avctx->pix_fmt = AV_PIX_FMT_NONE;
+        return ret;
+    } else
+        q->orig_pix_fmt = pix_fmts[1];
 
     if (!q->async_fifo) {
         q->async_fifo = av_fifo_alloc(q->async_depth * qsv_fifo_item_size());
@@ -170,48 +169,72 @@  static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q)
         return ret;
     }
 
-    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);
-    if (ret < 0)
-        return ret;
+    param->IOPattern   = q->iopattern;
+    param->AsyncDepth  = q->async_depth;
+    param->ExtParam    = q->ext_buffers;
+    param->NumExtParam = q->nb_ext_buffers;
 
-    param.mfx.CodecId      = ret;
-    param.mfx.CodecProfile = ff_qsv_profile_to_mfx(avctx->codec_id, avctx->profile);
-    param.mfx.CodecLevel   = avctx->level == FF_LEVEL_UNKNOWN ? MFX_LEVEL_UNKNOWN : avctx->level;
-
-    param.mfx.FrameInfo.BitDepthLuma   = desc->comp[0].depth;
-    param.mfx.FrameInfo.BitDepthChroma = desc->comp[0].depth;
-    param.mfx.FrameInfo.Shift          = desc->comp[0].depth > 8;
-    param.mfx.FrameInfo.FourCC         = q->fourcc;
-    param.mfx.FrameInfo.Width          = frame_width;
-    param.mfx.FrameInfo.Height         = frame_height;
-    param.mfx.FrameInfo.ChromaFormat   = MFX_CHROMAFORMAT_YUV420;
-
-    switch (avctx->field_order) {
-    case AV_FIELD_PROGRESSIVE:
-        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_PROGRESSIVE;
-        break;
-    case AV_FIELD_TT:
-        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_FIELD_TFF;
-        break;
-    case AV_FIELD_BB:
-        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_FIELD_BFF;
-        break;
-    default:
-        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_UNKNOWN;
-        break;
-    }
+    return 0;
+ }
+
+static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q, mfxVideoParam *param)
+{
+    int ret;
 
-    param.IOPattern   = q->iopattern;
-    param.AsyncDepth  = q->async_depth;
-    param.ExtParam    = q->ext_buffers;
-    param.NumExtParam = q->nb_ext_buffers;
+    avctx->width        = param->mfx.FrameInfo.CropW;
+    avctx->height       = param->mfx.FrameInfo.CropH;
+    avctx->coded_width  = param->mfx.FrameInfo.Width;
+    avctx->coded_height = param->mfx.FrameInfo.Height;
+    avctx->level        = param->mfx.CodecLevel;
+    avctx->profile      = param->mfx.CodecProfile;
+    avctx->field_order  = ff_qsv_map_picstruct(param->mfx.FrameInfo.PicStruct);
+    avctx->pix_fmt      = ff_qsv_map_fourcc(param->mfx.FrameInfo.FourCC);
 
-    ret = MFXVideoDECODE_Init(q->session, &param);
+    ret = MFXVideoDECODE_Init(q->session, param);
     if (ret < 0)
         return ff_qsv_print_error(avctx, ret,
                                   "Error initializing the MFX video decoder");
 
-    q->frame_info = param.mfx.FrameInfo;
+    q->frame_info = param->mfx.FrameInfo;
+
+    return 0;
+}
+
+static int qsv_decode_header(AVCodecContext *avctx, QSVContext *q, AVPacket *avpkt, enum AVPixelFormat *pix_fmts, mfxVideoParam *param)
+{
+    int ret;
+
+    mfxBitstream bs = { { { 0 } } };
+
+    if (avpkt->size) {
+        bs.Data       = avpkt->data;
+        bs.DataLength = avpkt->size;
+        bs.MaxLength  = bs.DataLength;
+        bs.TimeStamp  = avpkt->pts;
+        if (avctx->field_order == AV_FIELD_PROGRESSIVE)
+            bs.DataFlag   |= MFX_BITSTREAM_COMPLETE_FRAME;
+    } else
+        return AVERROR_INVALIDDATA;
+
+
+    if(!q->session) {
+        ret = qsv_decode_preinit(avctx, q, pix_fmts, param);
+        if (ret < 0)
+            return ret;
+    }
+
+    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);
+    if (ret < 0)
+        return ret;
+
+    param->mfx.CodecId = ret;
+    ret = MFXVideoDECODE_DecodeHeader(q->session, &bs, param);
+    if (MFX_ERR_MORE_DATA == ret) {
+       return AVERROR(EAGAIN);
+    }
+    if (ret < 0)
+        return ff_qsv_print_error(avctx, ret,
+                "Error decoding stream header");
 
     return 0;
 }
@@ -494,7 +517,11 @@  int ff_qsv_process_data(AVCodecContext *avctx, QSVContext *q,
     uint8_t *dummy_data;
     int dummy_size;
     int ret;
-    const AVPixFmtDescriptor *desc;
+    mfxVideoParam param = { 0 };
+    static enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,
+                                              AV_PIX_FMT_NV12,
+                                              AV_PIX_FMT_NONE };
+
 
     if (!q->avctx_internal) {
         q->avctx_internal = avcodec_alloc_context3(NULL);
@@ -521,15 +548,13 @@  int ff_qsv_process_data(AVCodecContext *avctx, QSVContext *q,
                      pkt->data, pkt->size, pkt->pts, pkt->dts,
                      pkt->pos);
 
-    avctx->field_order  = q->parser->field_order;
     /* TODO: flush delayed frames on reinit */
-    if (q->parser->format       != q->orig_pix_fmt    ||
-        FFALIGN(q->parser->coded_width, 16)  != FFALIGN(avctx->coded_width, 16) ||
-        FFALIGN(q->parser->coded_height, 16) != FFALIGN(avctx->coded_height, 16)) {
-        enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,
-                                           AV_PIX_FMT_NONE,
-                                           AV_PIX_FMT_NONE };
-        enum AVPixelFormat qsv_format;
+
+    ret = qsv_decode_header(avctx, q, pkt, pix_fmts, &param);
+
+    if (ret >= 0 && (q->orig_pix_fmt != ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC) ||
+        avctx->coded_width  != param.mfx.FrameInfo.Width ||
+        avctx->coded_height != param.mfx.FrameInfo.Height)) {
         AVPacket zero_pkt = {0};
 
         if (q->buffered_count) {
@@ -538,45 +563,23 @@  int ff_qsv_process_data(AVCodecContext *avctx, QSVContext *q,
             q->buffered_count--;
             return qsv_decode(avctx, q, frame, got_frame, &zero_pkt);
         }
-
         q->reinit_flag = 0;
 
-        qsv_format = ff_qsv_map_pixfmt(q->parser->format, &q->fourcc);
-        if (qsv_format < 0) {
-            av_log(avctx, AV_LOG_ERROR,
-                   "Decoding pixel format '%s' is not supported\n",
-                   av_get_pix_fmt_name(q->parser->format));
-            ret = AVERROR(ENOSYS);
-            goto reinit_fail;
-        }
-
-        q->orig_pix_fmt     = q->parser->format;
-        avctx->pix_fmt      = pix_fmts[1] = qsv_format;
-        avctx->width        = q->parser->width;
-        avctx->height       = q->parser->height;
-        avctx->coded_width  = FFALIGN(q->parser->coded_width, 16);
-        avctx->coded_height = FFALIGN(q->parser->coded_height, 16);
-        avctx->level        = q->avctx_internal->level;
-        avctx->profile      = q->avctx_internal->profile;
-
-        ret = ff_get_format(avctx, pix_fmts);
+        avctx->pix_fmt = pix_fmts[1]  = ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC);
+        avctx->coded_width  = param.mfx.FrameInfo.Width;
+        avctx->coded_height = param.mfx.FrameInfo.Height;
+        
+        ret = qsv_decode_preinit(avctx, q, pix_fmts, &param);
         if (ret < 0)
             goto reinit_fail;
+        q->initialized = 0;
+    }
 
-        avctx->pix_fmt = ret;
-
-        desc = av_pix_fmt_desc_get(avctx->pix_fmt);
-        if (!desc)
-            goto reinit_fail;
-
-         if (desc->comp[0].depth > 8) {
-            avctx->coded_width =  FFALIGN(q->parser->coded_width, 32);
-            avctx->coded_height = FFALIGN(q->parser->coded_height, 32);
-        }
-
-        ret = qsv_decode_init(avctx, q);
+    if (!q->initialized) {
+        ret = qsv_decode_init(avctx, q, &param);
         if (ret < 0)
             goto reinit_fail;
+        q->initialized = 1;
     }
 
     return qsv_decode(avctx, q, frame, got_frame, pkt);
@@ -589,4 +592,5 @@  reinit_fail:
 void ff_qsv_decode_flush(AVCodecContext *avctx, QSVContext *q)
 {
     q->orig_pix_fmt = AV_PIX_FMT_NONE;
+    q->initialized = 0;
 }
diff --git a/libavcodec/qsvdec.h b/libavcodec/qsvdec.h
index 111536caba..4812fb2a6b 100644
--- a/libavcodec/qsvdec.h
+++ b/libavcodec/qsvdec.h
@@ -63,6 +63,8 @@  typedef struct QSVContext {
     uint32_t fourcc;
     mfxFrameInfo frame_info;
 
+    int initialized;
+
     // options set by the caller
     int async_depth;
     int iopattern;