diff mbox series

[FFmpeg-devel] avcodec/libx264: fix extradata when config annexb=0

Message ID tencent_A60DA70CB8B31675CD4220734D944FD3D507@qq.com
State New
Headers show
Series [FFmpeg-devel] avcodec/libx264: fix extradata when config annexb=0 | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Zhao Zhili March 6, 2024, 1:35 p.m. UTC
From: Zhao Zhili <zhilizhao@tencent.com>

---
 configure            |   2 +-
 libavcodec/libx264.c | 148 ++++++++++++++++++++++++++++++++++++-------
 2 files changed, 125 insertions(+), 25 deletions(-)

Comments

Andreas Rheinhardt March 6, 2024, 2:18 p.m. UTC | #1
Zhao Zhili:
> From: Zhao Zhili <zhilizhao@tencent.com>
> 
> ---
>  configure            |   2 +-
>  libavcodec/libx264.c | 148 ++++++++++++++++++++++++++++++++++++-------
>  2 files changed, 125 insertions(+), 25 deletions(-)
> 
> diff --git a/configure b/configure
> index bbf1a70731..ef58c3fe97 100755
> --- a/configure
> +++ b/configure
> @@ -3491,7 +3491,7 @@ libwebp_encoder_deps="libwebp"
>  libwebp_anim_encoder_deps="libwebp"
>  libx262_encoder_deps="libx262"
>  libx264_encoder_deps="libx264"
> -libx264_encoder_select="atsc_a53"
> +libx264_encoder_select="atsc_a53 h264parse"
>  libx264rgb_encoder_deps="libx264"
>  libx264rgb_encoder_select="libx264_encoder"
>  libx265_encoder_deps="libx265"
> diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
> index 10d646bd76..10dfda125d 100644
> --- a/libavcodec/libx264.c
> +++ b/libavcodec/libx264.c
> @@ -34,9 +34,12 @@
>  #include "avcodec.h"
>  #include "codec_internal.h"
>  #include "encode.h"
> +#include "get_bits.h"
> +#include "h264_ps.h"
>  #include "internal.h"
>  #include "packet_internal.h"
>  #include "atsc_a53.h"
> +#include "put_bits.h"
>  #include "sei.h"
>  
>  #include <x264.h>
> @@ -865,6 +868,124 @@ static int convert_pix_fmt(enum AVPixelFormat pix_fmt)
>      return 0;
>  }
>  
> +static int set_avcc_extradata(AVCodecContext *avctx, x264_nal_t *nal, int nnal)
> +{
> +    x264_nal_t *sps_nal = NULL;
> +    x264_nal_t *pps_nal = NULL;
> +    GetBitContext gbc;
> +    PutBitContext pbc;
> +    H264ParamSets ps = { 0 };
> +    uint8_t *p;
> +    int ret, profile;
> +
> +    /* We know it's in the order of SPS/PPS/SEI, but it's not documented in x264 API.
> +     * The x264 param i_sps_id implies there is a single pair of SPS/PPS.
> +     */
> +    for (int i = 0; i < nnal; i++) {
> +        if (nal[i].i_type == NAL_SPS)
> +            sps_nal = &nal[i];
> +        else if (nal[i].i_type == NAL_PPS)
> +            pps_nal = &nal[i];
> +    }
> +    av_assert0(sps_nal);
> +    av_assert0(pps_nal);

We should never assert on the output of external libraries.

> +
> +    // +11 for AVCDecoderConfigurationRecord, will shrink to the real size finally
> +    avctx->extradata_size = sps_nal->i_payload + pps_nal->i_payload + 11;
> +    avctx->extradata = av_mallocz(avctx->extradata_size + AV_INPUT_BUFFER_MIN_SIZE);

AV_INPUT_BUFFER_PADDING_SIZE

> +    if (!avctx->extradata)
> +        return AVERROR(ENOMEM);
> +
> +    // Skip size part
> +    p = sps_nal->p_payload + 4;
> +    init_get_bits8(&gbc, p, sps_nal->i_payload - 4);
> +    skip_bits(&gbc, 8);
> +    ret = ff_h264_decode_seq_parameter_set(&gbc, avctx, &ps, 0);

This is completely overblown. You only read three fields: profile_idc,
the constraint fields and level_idc. All three are at the start of the
SPS and occupy a whole byte, so there is no need for a GetBitContext at all.
(Yes, I am aware of the hypothetical scenario in which we could have a
0x03 escape byte before level_idc (in case of a hypothetical future
profile zero), but even then using ff_h264_decode_seq_parameter_set() is
overblown.)

> +    if (ret < 0)
> +        return ret;
> +
> +    for (int i = 0; i < FF_ARRAY_ELEMS(ps.sps_list); i++) {
> +        if (ps.sps_list[i]) {
> +            ps.sps = ps.sps_list[i];
> +            break;
> +        }
> +    }
> +
> +    // Now create AVCDecoderConfigurationRecord
> +    init_put_bits(&pbc, avctx->extradata, avctx->extradata_size);
> +    put_bits(&pbc, 8, 1); // version
> +    put_bits(&pbc, 8, ps.sps->profile_idc); // AVCProfileIndication
> +    put_bits(&pbc, 8, ps.sps->constraint_set_flags); // profile_compatibility
> +    put_bits(&pbc, 8, ps.sps->level_idc); // AVCLevelIndication
> +    put_bits(&pbc, 11, 0x7FF);
> +
> +    p = sps_nal->p_payload + 4;
> +    put_bits(&pbc, 5, 1); // numOfSequenceParameterSets
> +    put_bits(&pbc, 16, sps_nal->i_payload - 4); // sequenceParameterSetLength

I also think that using a PutBitContext is overblown here (most fields
don't cross a byte-boundary, the bit-alignment of every field is fixed),
in particular if you copy the NALUs eight bits at a time:

> +    for (int i = 0; i < sps_nal->i_payload - 4; i++)
> +        put_bits(&pbc, 8, p[i]);
> +
> +    p = pps_nal->p_payload + 4;
> +    put_bits(&pbc, 8, 1); // numOfPictureParameterSets
> +    put_bits(&pbc, 16, pps_nal->i_payload - 4); // pictureParameterSetLength
> +    for (int i = 0; i < pps_nal->i_payload - 4; i++)
> +        put_bits(&pbc, 8, p[i]);
> +
> +    profile = ps.sps->profile_idc;
> +    if (profile != 66 && profile != 77 && profile != 88) {
> +        put_bits(&pbc, 6, 0x3F);
> +        put_bits(&pbc, 2, ps.sps->chroma_format_idc);
> +        put_bits(&pbc, 5, 0x1F);
> +        put_bits(&pbc, 3, ps.sps->bit_depth_luma - 8);
> +        put_bits(&pbc, 5, 0x1F);
> +        put_bits(&pbc, 3, ps.sps->bit_depth_chroma - 8);
> +        put_bits(&pbc, 8, 0);
> +    }
> +    flush_put_bits(&pbc);
> +    avctx->extradata_size = put_bytes_output(&pbc);
> +
> +    ff_h264_ps_uninit(&ps);
> +
> +    return 0;
> +}
> +
> +static int set_extradata(AVCodecContext *avctx)
> +{
> +    X264Context *x4 = avctx->priv_data;
> +    x264_nal_t *nal;
> +    uint8_t *p;
> +    int nnal, s;
> +
> +    s = x264_encoder_headers(x4->enc, &nal, &nnal);
> +    if (s < 0)
> +        return AVERROR_EXTERNAL;
> +
> +    if (!x4->params.b_annexb)
> +        return set_avcc_extradata(avctx, nal, nnal);
> +
> +    avctx->extradata = p = av_mallocz(s + AV_INPUT_BUFFER_PADDING_SIZE);
> +    if (!p)
> +        return AVERROR(ENOMEM);
> +
> +    for (int i = 0; i < nnal; i++) {
> +        /* Don't put the SEI in extradata. */
> +        if (nal[i].i_type == NAL_SEI) {
> +            av_log(avctx, AV_LOG_INFO, "%s\n", nal[i].p_payload + 25);
> +            x4->sei_size = nal[i].i_payload;
> +            x4->sei = av_malloc(x4->sei_size);
> +            if (!x4->sei)
> +                return AVERROR(ENOMEM);
> +            memcpy(x4->sei, nal[i].p_payload, nal[i].i_payload);
> +            continue;
> +        }
> +        memcpy(p, nal[i].p_payload, nal[i].i_payload);
> +        p += nal[i].i_payload;
> +    }
> +    avctx->extradata_size = p - avctx->extradata;
> +
> +    return 0;
> +}
> +
>  #define PARSE_X264_OPT(name, var)\
>      if (x4->var && x264_param_parse(&x4->params, name, x4->var) < 0) {\
>          av_log(avctx, AV_LOG_ERROR, "Error parsing option '%s' with value '%s'.\n", name, x4->var);\
> @@ -1233,30 +1354,9 @@ FF_ENABLE_DEPRECATION_WARNINGS
>          return AVERROR_EXTERNAL;
>  
>      if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
> -        x264_nal_t *nal;
> -        uint8_t *p;
> -        int nnal, s, i;
> -
> -        s = x264_encoder_headers(x4->enc, &nal, &nnal);
> -        avctx->extradata = p = av_mallocz(s + AV_INPUT_BUFFER_PADDING_SIZE);
> -        if (!p)
> -            return AVERROR(ENOMEM);
> -
> -        for (i = 0; i < nnal; i++) {
> -            /* Don't put the SEI in extradata. */
> -            if (nal[i].i_type == NAL_SEI) {
> -                av_log(avctx, AV_LOG_INFO, "%s\n", nal[i].p_payload+25);
> -                x4->sei_size = nal[i].i_payload;
> -                x4->sei      = av_malloc(x4->sei_size);
> -                if (!x4->sei)
> -                    return AVERROR(ENOMEM);
> -                memcpy(x4->sei, nal[i].p_payload, nal[i].i_payload);
> -                continue;
> -            }
> -            memcpy(p, nal[i].p_payload, nal[i].i_payload);
> -            p += nal[i].i_payload;
> -        }
> -        avctx->extradata_size = p - avctx->extradata;
> +        ret = set_extradata(avctx);
> +        if (ret < 0)
> +            return ret;
>      }
>  
>      cpb_props = ff_encode_add_cpb_side_data(avctx);
Zhao Zhili March 6, 2024, 4 p.m. UTC | #2
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Andreas Rheinhardt
> Sent: 2024年3月6日 22:19
> To: ffmpeg-devel@ffmpeg.org
> Subject: Re: [FFmpeg-devel] [PATCH] avcodec/libx264: fix extradata when config annexb=0
> 
> Zhao Zhili:
> > From: Zhao Zhili <zhilizhao@tencent.com>
> >
> > ---
> >  configure            |   2 +-
> >  libavcodec/libx264.c | 148 ++++++++++++++++++++++++++++++++++++-------
> >  2 files changed, 125 insertions(+), 25 deletions(-)
> >
> > diff --git a/configure b/configure
> > index bbf1a70731..ef58c3fe97 100755
> > --- a/configure
> > +++ b/configure
> > @@ -3491,7 +3491,7 @@ libwebp_encoder_deps="libwebp"
> >  libwebp_anim_encoder_deps="libwebp"
> >  libx262_encoder_deps="libx262"
> >  libx264_encoder_deps="libx264"
> > -libx264_encoder_select="atsc_a53"
> > +libx264_encoder_select="atsc_a53 h264parse"
> >  libx264rgb_encoder_deps="libx264"
> >  libx264rgb_encoder_select="libx264_encoder"
> >  libx265_encoder_deps="libx265"
> > diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
> > index 10d646bd76..10dfda125d 100644
> > --- a/libavcodec/libx264.c
> > +++ b/libavcodec/libx264.c
> > @@ -34,9 +34,12 @@
> >  #include "avcodec.h"
> >  #include "codec_internal.h"
> >  #include "encode.h"
> > +#include "get_bits.h"
> > +#include "h264_ps.h"
> >  #include "internal.h"
> >  #include "packet_internal.h"
> >  #include "atsc_a53.h"
> > +#include "put_bits.h"
> >  #include "sei.h"
> >
> >  #include <x264.h>
> > @@ -865,6 +868,124 @@ static int convert_pix_fmt(enum AVPixelFormat pix_fmt)
> >      return 0;
> >  }
> >
> > +static int set_avcc_extradata(AVCodecContext *avctx, x264_nal_t *nal, int nnal)
> > +{
> > +    x264_nal_t *sps_nal = NULL;
> > +    x264_nal_t *pps_nal = NULL;
> > +    GetBitContext gbc;
> > +    PutBitContext pbc;
> > +    H264ParamSets ps = { 0 };
> > +    uint8_t *p;
> > +    int ret, profile;
> > +
> > +    /* We know it's in the order of SPS/PPS/SEI, but it's not documented in x264 API.
> > +     * The x264 param i_sps_id implies there is a single pair of SPS/PPS.
> > +     */
> > +    for (int i = 0; i < nnal; i++) {
> > +        if (nal[i].i_type == NAL_SPS)
> > +            sps_nal = &nal[i];
> > +        else if (nal[i].i_type == NAL_PPS)
> > +            pps_nal = &nal[i];
> > +    }
> > +    av_assert0(sps_nal);
> > +    av_assert0(pps_nal);
> 
> We should never assert on the output of external libraries.

Ok, will just remove the assert. I don't think a check is needed.

> 
> > +
> > +    // +11 for AVCDecoderConfigurationRecord, will shrink to the real size finally
> > +    avctx->extradata_size = sps_nal->i_payload + pps_nal->i_payload + 11;
> > +    avctx->extradata = av_mallocz(avctx->extradata_size + AV_INPUT_BUFFER_MIN_SIZE);
> 
> AV_INPUT_BUFFER_PADDING_SIZE

Oops.

> 
> > +    if (!avctx->extradata)
> > +        return AVERROR(ENOMEM);
> > +
> > +    // Skip size part
> > +    p = sps_nal->p_payload + 4;
> > +    init_get_bits8(&gbc, p, sps_nal->i_payload - 4);
> > +    skip_bits(&gbc, 8);
> > +    ret = ff_h264_decode_seq_parameter_set(&gbc, avctx, &ps, 0);
> 
> This is completely overblown. You only read three fields: profile_idc,
> the constraint fields and level_idc. All three are at the start of the
> SPS and occupy a whole byte, so there is no need for a GetBitContext at all.
> (Yes, I am aware of the hypothetical scenario in which we could have a
> 0x03 escape byte before level_idc (in case of a hypothetical future
> profile zero), but even then using ff_h264_decode_seq_parameter_set() is
> overblown.)

You missed chroma_format_idc, bit_depth_luma/bit_depth_chroma.
I can get that from pixel format, but there are a lot of them.

> 
> > +    if (ret < 0)
> > +        return ret;
> > +
> > +    for (int i = 0; i < FF_ARRAY_ELEMS(ps.sps_list); i++) {
> > +        if (ps.sps_list[i]) {
> > +            ps.sps = ps.sps_list[i];
> > +            break;
> > +        }
> > +    }
> > +
> > +    // Now create AVCDecoderConfigurationRecord
> > +    init_put_bits(&pbc, avctx->extradata, avctx->extradata_size);
> > +    put_bits(&pbc, 8, 1); // version
> > +    put_bits(&pbc, 8, ps.sps->profile_idc); // AVCProfileIndication
> > +    put_bits(&pbc, 8, ps.sps->constraint_set_flags); // profile_compatibility
> > +    put_bits(&pbc, 8, ps.sps->level_idc); // AVCLevelIndication
> > +    put_bits(&pbc, 11, 0x7FF);
> > +
> > +    p = sps_nal->p_payload + 4;
> > +    put_bits(&pbc, 5, 1); // numOfSequenceParameterSets
> > +    put_bits(&pbc, 16, sps_nal->i_payload - 4); // sequenceParameterSetLength
> 
> I also think that using a PutBitContext is overblown here (most fields
> don't cross a byte-boundary, the bit-alignment of every field is fixed),
> in particular if you copy the NALUs eight bits at a time:

It's readable, close related to the spec, and it counts how many bytes actually used
automatically. I choose it for these convenience over performance.

> 
> > +    for (int i = 0; i < sps_nal->i_payload - 4; i++)
> > +        put_bits(&pbc, 8, p[i]);
> > +
> > +    p = pps_nal->p_payload + 4;
> > +    put_bits(&pbc, 8, 1); // numOfPictureParameterSets
> > +    put_bits(&pbc, 16, pps_nal->i_payload - 4); // pictureParameterSetLength
> > +    for (int i = 0; i < pps_nal->i_payload - 4; i++)
> > +        put_bits(&pbc, 8, p[i]);
> > +
> > +    profile = ps.sps->profile_idc;
> > +    if (profile != 66 && profile != 77 && profile != 88) {
> > +        put_bits(&pbc, 6, 0x3F);
> > +        put_bits(&pbc, 2, ps.sps->chroma_format_idc);
> > +        put_bits(&pbc, 5, 0x1F);
> > +        put_bits(&pbc, 3, ps.sps->bit_depth_luma - 8);
> > +        put_bits(&pbc, 5, 0x1F);
> > +        put_bits(&pbc, 3, ps.sps->bit_depth_chroma - 8);
> > +        put_bits(&pbc, 8, 0);
> > +    }
> > +    flush_put_bits(&pbc);
> > +    avctx->extradata_size = put_bytes_output(&pbc);
> > +
> > +    ff_h264_ps_uninit(&ps);
> > +
> > +    return 0;
> > +}
> > +
> > +static int set_extradata(AVCodecContext *avctx)
> > +{
> > +    X264Context *x4 = avctx->priv_data;
> > +    x264_nal_t *nal;
> > +    uint8_t *p;
> > +    int nnal, s;
> > +
> > +    s = x264_encoder_headers(x4->enc, &nal, &nnal);
> > +    if (s < 0)
> > +        return AVERROR_EXTERNAL;
> > +
> > +    if (!x4->params.b_annexb)
> > +        return set_avcc_extradata(avctx, nal, nnal);
> > +
> > +    avctx->extradata = p = av_mallocz(s + AV_INPUT_BUFFER_PADDING_SIZE);
> > +    if (!p)
> > +        return AVERROR(ENOMEM);
> > +
> > +    for (int i = 0; i < nnal; i++) {
> > +        /* Don't put the SEI in extradata. */
> > +        if (nal[i].i_type == NAL_SEI) {
> > +            av_log(avctx, AV_LOG_INFO, "%s\n", nal[i].p_payload + 25);
> > +            x4->sei_size = nal[i].i_payload;
> > +            x4->sei = av_malloc(x4->sei_size);
> > +            if (!x4->sei)
> > +                return AVERROR(ENOMEM);
> > +            memcpy(x4->sei, nal[i].p_payload, nal[i].i_payload);
> > +            continue;
> > +        }
> > +        memcpy(p, nal[i].p_payload, nal[i].i_payload);
> > +        p += nal[i].i_payload;
> > +    }
> > +    avctx->extradata_size = p - avctx->extradata;
> > +
> > +    return 0;
> > +}
> > +
> >  #define PARSE_X264_OPT(name, var)\
> >      if (x4->var && x264_param_parse(&x4->params, name, x4->var) < 0) {\
> >          av_log(avctx, AV_LOG_ERROR, "Error parsing option '%s' with value '%s'.\n", name, x4->var);\
> > @@ -1233,30 +1354,9 @@ FF_ENABLE_DEPRECATION_WARNINGS
> >          return AVERROR_EXTERNAL;
> >
> >      if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
> > -        x264_nal_t *nal;
> > -        uint8_t *p;
> > -        int nnal, s, i;
> > -
> > -        s = x264_encoder_headers(x4->enc, &nal, &nnal);
> > -        avctx->extradata = p = av_mallocz(s + AV_INPUT_BUFFER_PADDING_SIZE);
> > -        if (!p)
> > -            return AVERROR(ENOMEM);
> > -
> > -        for (i = 0; i < nnal; i++) {
> > -            /* Don't put the SEI in extradata. */
> > -            if (nal[i].i_type == NAL_SEI) {
> > -                av_log(avctx, AV_LOG_INFO, "%s\n", nal[i].p_payload+25);
> > -                x4->sei_size = nal[i].i_payload;
> > -                x4->sei      = av_malloc(x4->sei_size);
> > -                if (!x4->sei)
> > -                    return AVERROR(ENOMEM);
> > -                memcpy(x4->sei, nal[i].p_payload, nal[i].i_payload);
> > -                continue;
> > -            }
> > -            memcpy(p, nal[i].p_payload, nal[i].i_payload);
> > -            p += nal[i].i_payload;
> > -        }
> > -        avctx->extradata_size = p - avctx->extradata;
> > +        ret = set_extradata(avctx);
> > +        if (ret < 0)
> > +            return ret;
> >      }
> >
> >      cpb_props = ff_encode_add_cpb_side_data(avctx);
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Andreas Rheinhardt March 6, 2024, 4:23 p.m. UTC | #3
Zhao Zhili:
> 
> 
>> -----Original Message-----
>> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Andreas Rheinhardt
>> Sent: 2024年3月6日 22:19
>> To: ffmpeg-devel@ffmpeg.org
>> Subject: Re: [FFmpeg-devel] [PATCH] avcodec/libx264: fix extradata when config annexb=0
>>
>> Zhao Zhili:
>>> From: Zhao Zhili <zhilizhao@tencent.com>
>>>
>>> ---
>>>  configure            |   2 +-
>>>  libavcodec/libx264.c | 148 ++++++++++++++++++++++++++++++++++++-------
>>>  2 files changed, 125 insertions(+), 25 deletions(-)
>>>
>>> diff --git a/configure b/configure
>>> index bbf1a70731..ef58c3fe97 100755
>>> --- a/configure
>>> +++ b/configure
>>> @@ -3491,7 +3491,7 @@ libwebp_encoder_deps="libwebp"
>>>  libwebp_anim_encoder_deps="libwebp"
>>>  libx262_encoder_deps="libx262"
>>>  libx264_encoder_deps="libx264"
>>> -libx264_encoder_select="atsc_a53"
>>> +libx264_encoder_select="atsc_a53 h264parse"
>>>  libx264rgb_encoder_deps="libx264"
>>>  libx264rgb_encoder_select="libx264_encoder"
>>>  libx265_encoder_deps="libx265"
>>> diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
>>> index 10d646bd76..10dfda125d 100644
>>> --- a/libavcodec/libx264.c
>>> +++ b/libavcodec/libx264.c
>>> @@ -34,9 +34,12 @@
>>>  #include "avcodec.h"
>>>  #include "codec_internal.h"
>>>  #include "encode.h"
>>> +#include "get_bits.h"
>>> +#include "h264_ps.h"
>>>  #include "internal.h"
>>>  #include "packet_internal.h"
>>>  #include "atsc_a53.h"
>>> +#include "put_bits.h"
>>>  #include "sei.h"
>>>
>>>  #include <x264.h>
>>> @@ -865,6 +868,124 @@ static int convert_pix_fmt(enum AVPixelFormat pix_fmt)
>>>      return 0;
>>>  }
>>>
>>> +static int set_avcc_extradata(AVCodecContext *avctx, x264_nal_t *nal, int nnal)
>>> +{
>>> +    x264_nal_t *sps_nal = NULL;
>>> +    x264_nal_t *pps_nal = NULL;
>>> +    GetBitContext gbc;
>>> +    PutBitContext pbc;
>>> +    H264ParamSets ps = { 0 };
>>> +    uint8_t *p;
>>> +    int ret, profile;
>>> +
>>> +    /* We know it's in the order of SPS/PPS/SEI, but it's not documented in x264 API.
>>> +     * The x264 param i_sps_id implies there is a single pair of SPS/PPS.
>>> +     */
>>> +    for (int i = 0; i < nnal; i++) {
>>> +        if (nal[i].i_type == NAL_SPS)
>>> +            sps_nal = &nal[i];
>>> +        else if (nal[i].i_type == NAL_PPS)
>>> +            pps_nal = &nal[i];
>>> +    }
>>> +    av_assert0(sps_nal);
>>> +    av_assert0(pps_nal);
>>
>> We should never assert on the output of external libraries.
> 
> Ok, will just remove the assert. I don't think a check is needed.
> 
>>
>>> +
>>> +    // +11 for AVCDecoderConfigurationRecord, will shrink to the real size finally
>>> +    avctx->extradata_size = sps_nal->i_payload + pps_nal->i_payload + 11;
>>> +    avctx->extradata = av_mallocz(avctx->extradata_size + AV_INPUT_BUFFER_MIN_SIZE);
>>
>> AV_INPUT_BUFFER_PADDING_SIZE
> 
> Oops.
> 
>>
>>> +    if (!avctx->extradata)
>>> +        return AVERROR(ENOMEM);
>>> +
>>> +    // Skip size part
>>> +    p = sps_nal->p_payload + 4;
>>> +    init_get_bits8(&gbc, p, sps_nal->i_payload - 4);
>>> +    skip_bits(&gbc, 8);
>>> +    ret = ff_h264_decode_seq_parameter_set(&gbc, avctx, &ps, 0);
>>
>> This is completely overblown. You only read three fields: profile_idc,
>> the constraint fields and level_idc. All three are at the start of the
>> SPS and occupy a whole byte, so there is no need for a GetBitContext at all.
>> (Yes, I am aware of the hypothetical scenario in which we could have a
>> 0x03 escape byte before level_idc (in case of a hypothetical future
>> profile zero), but even then using ff_h264_decode_seq_parameter_set() is
>> overblown.)
> 
> You missed chroma_format_idc, bit_depth_luma/bit_depth_chroma.
> I can get that from pixel format, but there are a lot of them.
> 

Ok, but even then I fail to see a convenience advantage here. After all,
using it e.g. forces the following loop onto you. And it forces cleanup
code here. Also, there is a gotcha here. See below.

>>
>>> +    if (ret < 0)
>>> +        return ret;
>>> +
>>> +    for (int i = 0; i < FF_ARRAY_ELEMS(ps.sps_list); i++) {
>>> +        if (ps.sps_list[i]) {
>>> +            ps.sps = ps.sps_list[i];
>>> +            break;
>>> +        }
>>> +    }
>>> +
>>> +    // Now create AVCDecoderConfigurationRecord
>>> +    init_put_bits(&pbc, avctx->extradata, avctx->extradata_size);
>>> +    put_bits(&pbc, 8, 1); // version
>>> +    put_bits(&pbc, 8, ps.sps->profile_idc); // AVCProfileIndication
>>> +    put_bits(&pbc, 8, ps.sps->constraint_set_flags); // profile_compatibility

Sure this is correct at all? It seems to me that h264_ps.c reverses the
order of the constraint_set_flags (constraint_set0_flag (which is the
0x80 bit in a file) will be mapped to 0x01 etc.) and it discards
reserved_zero_2bits. You should instead simply copy the relevant byte
from the SPS.

>>> +    put_bits(&pbc, 8, ps.sps->level_idc); // AVCLevelIndication
>>> +    put_bits(&pbc, 11, 0x7FF);
>>> +
>>> +    p = sps_nal->p_payload + 4;
>>> +    put_bits(&pbc, 5, 1); // numOfSequenceParameterSets
>>> +    put_bits(&pbc, 16, sps_nal->i_payload - 4); // sequenceParameterSetLength
>>
>> I also think that using a PutBitContext is overblown here (most fields
>> don't cross a byte-boundary, the bit-alignment of every field is fixed),
>> in particular if you copy the NALUs eight bits at a time:
> 
> It's readable, close related to the spec, and it counts how many bytes actually used
> automatically. I choose it for these convenience over performance.
> 

I regard a simple memcpy() as more convenient than the following loop:

>>
>>> +    for (int i = 0; i < sps_nal->i_payload - 4; i++)
>>> +        put_bits(&pbc, 8, p[i]);
>>> +
>>> +    p = pps_nal->p_payload + 4;
>>> +    put_bits(&pbc, 8, 1); // numOfPictureParameterSets
>>> +    put_bits(&pbc, 16, pps_nal->i_payload - 4); // pictureParameterSetLength
>>> +    for (int i = 0; i < pps_nal->i_payload - 4; i++)
>>> +        put_bits(&pbc, 8, p[i]);
>>> +
>>> +    profile = ps.sps->profile_idc;
>>> +    if (profile != 66 && profile != 77 && profile != 88) {
>>> +        put_bits(&pbc, 6, 0x3F);
>>> +        put_bits(&pbc, 2, ps.sps->chroma_format_idc);
>>> +        put_bits(&pbc, 5, 0x1F);
>>> +        put_bits(&pbc, 3, ps.sps->bit_depth_luma - 8);
>>> +        put_bits(&pbc, 5, 0x1F);
>>> +        put_bits(&pbc, 3, ps.sps->bit_depth_chroma - 8);
>>> +        put_bits(&pbc, 8, 0);
>>> +    }
>>> +    flush_put_bits(&pbc);
>>> +    avctx->extradata_size = put_bytes_output(&pbc);
>>> +
>>> +    ff_h264_ps_uninit(&ps);
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +static int set_extradata(AVCodecContext *avctx)
>>> +{
>>> +    X264Context *x4 = avctx->priv_data;
>>> +    x264_nal_t *nal;
>>> +    uint8_t *p;
>>> +    int nnal, s;
>>> +
>>> +    s = x264_encoder_headers(x4->enc, &nal, &nnal);
>>> +    if (s < 0)
>>> +        return AVERROR_EXTERNAL;
>>> +
>>> +    if (!x4->params.b_annexb)
>>> +        return set_avcc_extradata(avctx, nal, nnal);
>>> +
>>> +    avctx->extradata = p = av_mallocz(s + AV_INPUT_BUFFER_PADDING_SIZE);
>>> +    if (!p)
>>> +        return AVERROR(ENOMEM);
>>> +
>>> +    for (int i = 0; i < nnal; i++) {
>>> +        /* Don't put the SEI in extradata. */
>>> +        if (nal[i].i_type == NAL_SEI) {
>>> +            av_log(avctx, AV_LOG_INFO, "%s\n", nal[i].p_payload + 25);
>>> +            x4->sei_size = nal[i].i_payload;
>>> +            x4->sei = av_malloc(x4->sei_size);
>>> +            if (!x4->sei)
>>> +                return AVERROR(ENOMEM);
>>> +            memcpy(x4->sei, nal[i].p_payload, nal[i].i_payload);
>>> +            continue;
>>> +        }
>>> +        memcpy(p, nal[i].p_payload, nal[i].i_payload);
>>> +        p += nal[i].i_payload;
>>> +    }
>>> +    avctx->extradata_size = p - avctx->extradata;
>>> +
>>> +    return 0;
>>> +}
>>> +
>>>  #define PARSE_X264_OPT(name, var)\
>>>      if (x4->var && x264_param_parse(&x4->params, name, x4->var) < 0) {\
>>>          av_log(avctx, AV_LOG_ERROR, "Error parsing option '%s' with value '%s'.\n", name, x4->var);\
>>> @@ -1233,30 +1354,9 @@ FF_ENABLE_DEPRECATION_WARNINGS
>>>          return AVERROR_EXTERNAL;
>>>
>>>      if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
>>> -        x264_nal_t *nal;
>>> -        uint8_t *p;
>>> -        int nnal, s, i;
>>> -
>>> -        s = x264_encoder_headers(x4->enc, &nal, &nnal);
>>> -        avctx->extradata = p = av_mallocz(s + AV_INPUT_BUFFER_PADDING_SIZE);
>>> -        if (!p)
>>> -            return AVERROR(ENOMEM);
>>> -
>>> -        for (i = 0; i < nnal; i++) {
>>> -            /* Don't put the SEI in extradata. */
>>> -            if (nal[i].i_type == NAL_SEI) {
>>> -                av_log(avctx, AV_LOG_INFO, "%s\n", nal[i].p_payload+25);
>>> -                x4->sei_size = nal[i].i_payload;
>>> -                x4->sei      = av_malloc(x4->sei_size);
>>> -                if (!x4->sei)
>>> -                    return AVERROR(ENOMEM);
>>> -                memcpy(x4->sei, nal[i].p_payload, nal[i].i_payload);
>>> -                continue;
>>> -            }
>>> -            memcpy(p, nal[i].p_payload, nal[i].i_payload);
>>> -            p += nal[i].i_payload;
>>> -        }
>>> -        avctx->extradata_size = p - avctx->extradata;
>>> +        ret = set_extradata(avctx);
>>> +        if (ret < 0)
>>> +            return ret;
>>>      }
>>>
>>>      cpb_props = ff_encode_add_cpb_side_data(avctx);
>>
Zhao Zhili March 6, 2024, 4:46 p.m. UTC | #4
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Andreas Rheinhardt
> Sent: 2024年3月7日 0:24
> To: ffmpeg-devel@ffmpeg.org
> Subject: Re: [FFmpeg-devel] [PATCH] avcodec/libx264: fix extradata when config annexb=0
> 
> Zhao Zhili:
> >
> >
> >> -----Original Message-----
> >> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Andreas Rheinhardt
> >> Sent: 2024年3月6日 22:19
> >> To: ffmpeg-devel@ffmpeg.org
> >> Subject: Re: [FFmpeg-devel] [PATCH] avcodec/libx264: fix extradata when config annexb=0
> >>
> >> Zhao Zhili:
> >>> From: Zhao Zhili <zhilizhao@tencent.com>
> >>>
> >>> ---
> >>>  configure            |   2 +-
> >>>  libavcodec/libx264.c | 148 ++++++++++++++++++++++++++++++++++++-------
> >>>  2 files changed, 125 insertions(+), 25 deletions(-)
> >>>
> >>> diff --git a/configure b/configure
> >>> index bbf1a70731..ef58c3fe97 100755
> >>> --- a/configure
> >>> +++ b/configure
> >>> @@ -3491,7 +3491,7 @@ libwebp_encoder_deps="libwebp"
> >>>  libwebp_anim_encoder_deps="libwebp"
> >>>  libx262_encoder_deps="libx262"
> >>>  libx264_encoder_deps="libx264"
> >>> -libx264_encoder_select="atsc_a53"
> >>> +libx264_encoder_select="atsc_a53 h264parse"
> >>>  libx264rgb_encoder_deps="libx264"
> >>>  libx264rgb_encoder_select="libx264_encoder"
> >>>  libx265_encoder_deps="libx265"
> >>> diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
> >>> index 10d646bd76..10dfda125d 100644
> >>> --- a/libavcodec/libx264.c
> >>> +++ b/libavcodec/libx264.c
> >>> @@ -34,9 +34,12 @@
> >>>  #include "avcodec.h"
> >>>  #include "codec_internal.h"
> >>>  #include "encode.h"
> >>> +#include "get_bits.h"
> >>> +#include "h264_ps.h"
> >>>  #include "internal.h"
> >>>  #include "packet_internal.h"
> >>>  #include "atsc_a53.h"
> >>> +#include "put_bits.h"
> >>>  #include "sei.h"
> >>>
> >>>  #include <x264.h>
> >>> @@ -865,6 +868,124 @@ static int convert_pix_fmt(enum AVPixelFormat pix_fmt)
> >>>      return 0;
> >>>  }
> >>>
> >>> +static int set_avcc_extradata(AVCodecContext *avctx, x264_nal_t *nal, int nnal)
> >>> +{
> >>> +    x264_nal_t *sps_nal = NULL;
> >>> +    x264_nal_t *pps_nal = NULL;
> >>> +    GetBitContext gbc;
> >>> +    PutBitContext pbc;
> >>> +    H264ParamSets ps = { 0 };
> >>> +    uint8_t *p;
> >>> +    int ret, profile;
> >>> +
> >>> +    /* We know it's in the order of SPS/PPS/SEI, but it's not documented in x264 API.
> >>> +     * The x264 param i_sps_id implies there is a single pair of SPS/PPS.
> >>> +     */
> >>> +    for (int i = 0; i < nnal; i++) {
> >>> +        if (nal[i].i_type == NAL_SPS)
> >>> +            sps_nal = &nal[i];
> >>> +        else if (nal[i].i_type == NAL_PPS)
> >>> +            pps_nal = &nal[i];
> >>> +    }
> >>> +    av_assert0(sps_nal);
> >>> +    av_assert0(pps_nal);
> >>
> >> We should never assert on the output of external libraries.
> >
> > Ok, will just remove the assert. I don't think a check is needed.
> >
> >>
> >>> +
> >>> +    // +11 for AVCDecoderConfigurationRecord, will shrink to the real size finally
> >>> +    avctx->extradata_size = sps_nal->i_payload + pps_nal->i_payload + 11;
> >>> +    avctx->extradata = av_mallocz(avctx->extradata_size + AV_INPUT_BUFFER_MIN_SIZE);
> >>
> >> AV_INPUT_BUFFER_PADDING_SIZE
> >
> > Oops.
> >
> >>
> >>> +    if (!avctx->extradata)
> >>> +        return AVERROR(ENOMEM);
> >>> +
> >>> +    // Skip size part
> >>> +    p = sps_nal->p_payload + 4;
> >>> +    init_get_bits8(&gbc, p, sps_nal->i_payload - 4);
> >>> +    skip_bits(&gbc, 8);
> >>> +    ret = ff_h264_decode_seq_parameter_set(&gbc, avctx, &ps, 0);
> >>
> >> This is completely overblown. You only read three fields: profile_idc,
> >> the constraint fields and level_idc. All three are at the start of the
> >> SPS and occupy a whole byte, so there is no need for a GetBitContext at all.
> >> (Yes, I am aware of the hypothetical scenario in which we could have a
> >> 0x03 escape byte before level_idc (in case of a hypothetical future
> >> profile zero), but even then using ff_h264_decode_seq_parameter_set() is
> >> overblown.)
> >
> > You missed chroma_format_idc, bit_depth_luma/bit_depth_chroma.
> > I can get that from pixel format, but there are a lot of them.
> >
> 
> Ok, but even then I fail to see a convenience advantage here. After all,
> using it e.g. forces the following loop onto you. And it forces cleanup
> code here. Also, there is a gotcha here. See below.

I didn't use ff_h264_decode_seq_parameter_set at first, until met
chroma_format_idc. Get it from multiple pixel format doesn't like
the right way to go. Manually decode golomb is code duplication.

The following loop can be removed by using i_sps_id from x264
params. Loop over sps_list is more self-contained.

For hevc, it will be more complex and unlikely to avoid ff_hevc_decode_nal_.

> 
> >>
> >>> +    if (ret < 0)
> >>> +        return ret;
> >>> +
> >>> +    for (int i = 0; i < FF_ARRAY_ELEMS(ps.sps_list); i++) {
> >>> +        if (ps.sps_list[i]) {
> >>> +            ps.sps = ps.sps_list[i];
> >>> +            break;
> >>> +        }
> >>> +    }
> >>> +
> >>> +    // Now create AVCDecoderConfigurationRecord
> >>> +    init_put_bits(&pbc, avctx->extradata, avctx->extradata_size);
> >>> +    put_bits(&pbc, 8, 1); // version
> >>> +    put_bits(&pbc, 8, ps.sps->profile_idc); // AVCProfileIndication
> >>> +    put_bits(&pbc, 8, ps.sps->constraint_set_flags); // profile_compatibility
> 
> Sure this is correct at all? It seems to me that h264_ps.c reverses the
> order of the constraint_set_flags (constraint_set0_flag (which is the
> 0x80 bit in a file) will be mapped to 0x01 etc.) and it discards
> reserved_zero_2bits. You should instead simply copy the relevant byte
> from the SPS.
> 
> >>> +    put_bits(&pbc, 8, ps.sps->level_idc); // AVCLevelIndication
> >>> +    put_bits(&pbc, 11, 0x7FF);
> >>> +
> >>> +    p = sps_nal->p_payload + 4;
> >>> +    put_bits(&pbc, 5, 1); // numOfSequenceParameterSets
> >>> +    put_bits(&pbc, 16, sps_nal->i_payload - 4); // sequenceParameterSetLength
> >>
> >> I also think that using a PutBitContext is overblown here (most fields
> >> don't cross a byte-boundary, the bit-alignment of every field is fixed),
> >> in particular if you copy the NALUs eight bits at a time:
> >
> > It's readable, close related to the spec, and it counts how many bytes actually used
> > automatically. I choose it for these convenience over performance.
> >
> 
> I regard a simple memcpy() as more convenient than the following loop:
> 
> >>
> >>> +    for (int i = 0; i < sps_nal->i_payload - 4; i++)
> >>> +        put_bits(&pbc, 8, p[i]);
> >>> +
> >>> +    p = pps_nal->p_payload + 4;
> >>> +    put_bits(&pbc, 8, 1); // numOfPictureParameterSets
> >>> +    put_bits(&pbc, 16, pps_nal->i_payload - 4); // pictureParameterSetLength
> >>> +    for (int i = 0; i < pps_nal->i_payload - 4; i++)
> >>> +        put_bits(&pbc, 8, p[i]);
> >>> +
> >>> +    profile = ps.sps->profile_idc;
> >>> +    if (profile != 66 && profile != 77 && profile != 88) {
> >>> +        put_bits(&pbc, 6, 0x3F);
> >>> +        put_bits(&pbc, 2, ps.sps->chroma_format_idc);
> >>> +        put_bits(&pbc, 5, 0x1F);
> >>> +        put_bits(&pbc, 3, ps.sps->bit_depth_luma - 8);
> >>> +        put_bits(&pbc, 5, 0x1F);
> >>> +        put_bits(&pbc, 3, ps.sps->bit_depth_chroma - 8);
> >>> +        put_bits(&pbc, 8, 0);
> >>> +    }
> >>> +    flush_put_bits(&pbc);
> >>> +    avctx->extradata_size = put_bytes_output(&pbc);
> >>> +
> >>> +    ff_h264_ps_uninit(&ps);
> >>> +
> >>> +    return 0;
> >>> +}
> >>> +
> >>> +static int set_extradata(AVCodecContext *avctx)
> >>> +{
> >>> +    X264Context *x4 = avctx->priv_data;
> >>> +    x264_nal_t *nal;
> >>> +    uint8_t *p;
> >>> +    int nnal, s;
> >>> +
> >>> +    s = x264_encoder_headers(x4->enc, &nal, &nnal);
> >>> +    if (s < 0)
> >>> +        return AVERROR_EXTERNAL;
> >>> +
> >>> +    if (!x4->params.b_annexb)
> >>> +        return set_avcc_extradata(avctx, nal, nnal);
> >>> +
> >>> +    avctx->extradata = p = av_mallocz(s + AV_INPUT_BUFFER_PADDING_SIZE);
> >>> +    if (!p)
> >>> +        return AVERROR(ENOMEM);
> >>> +
> >>> +    for (int i = 0; i < nnal; i++) {
> >>> +        /* Don't put the SEI in extradata. */
> >>> +        if (nal[i].i_type == NAL_SEI) {
> >>> +            av_log(avctx, AV_LOG_INFO, "%s\n", nal[i].p_payload + 25);
> >>> +            x4->sei_size = nal[i].i_payload;
> >>> +            x4->sei = av_malloc(x4->sei_size);
> >>> +            if (!x4->sei)
> >>> +                return AVERROR(ENOMEM);
> >>> +            memcpy(x4->sei, nal[i].p_payload, nal[i].i_payload);
> >>> +            continue;
> >>> +        }
> >>> +        memcpy(p, nal[i].p_payload, nal[i].i_payload);
> >>> +        p += nal[i].i_payload;
> >>> +    }
> >>> +    avctx->extradata_size = p - avctx->extradata;
> >>> +
> >>> +    return 0;
> >>> +}
> >>> +
> >>>  #define PARSE_X264_OPT(name, var)\
> >>>      if (x4->var && x264_param_parse(&x4->params, name, x4->var) < 0) {\
> >>>          av_log(avctx, AV_LOG_ERROR, "Error parsing option '%s' with value '%s'.\n", name, x4->var);\
> >>> @@ -1233,30 +1354,9 @@ FF_ENABLE_DEPRECATION_WARNINGS
> >>>          return AVERROR_EXTERNAL;
> >>>
> >>>      if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
> >>> -        x264_nal_t *nal;
> >>> -        uint8_t *p;
> >>> -        int nnal, s, i;
> >>> -
> >>> -        s = x264_encoder_headers(x4->enc, &nal, &nnal);
> >>> -        avctx->extradata = p = av_mallocz(s + AV_INPUT_BUFFER_PADDING_SIZE);
> >>> -        if (!p)
> >>> -            return AVERROR(ENOMEM);
> >>> -
> >>> -        for (i = 0; i < nnal; i++) {
> >>> -            /* Don't put the SEI in extradata. */
> >>> -            if (nal[i].i_type == NAL_SEI) {
> >>> -                av_log(avctx, AV_LOG_INFO, "%s\n", nal[i].p_payload+25);
> >>> -                x4->sei_size = nal[i].i_payload;
> >>> -                x4->sei      = av_malloc(x4->sei_size);
> >>> -                if (!x4->sei)
> >>> -                    return AVERROR(ENOMEM);
> >>> -                memcpy(x4->sei, nal[i].p_payload, nal[i].i_payload);
> >>> -                continue;
> >>> -            }
> >>> -            memcpy(p, nal[i].p_payload, nal[i].i_payload);
> >>> -            p += nal[i].i_payload;
> >>> -        }
> >>> -        avctx->extradata_size = p - avctx->extradata;
> >>> +        ret = set_extradata(avctx);
> >>> +        if (ret < 0)
> >>> +            return ret;
> >>>      }
> >>>
> >>>      cpb_props = ff_encode_add_cpb_side_data(avctx);
> >>
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
diff mbox series

Patch

diff --git a/configure b/configure
index bbf1a70731..ef58c3fe97 100755
--- a/configure
+++ b/configure
@@ -3491,7 +3491,7 @@  libwebp_encoder_deps="libwebp"
 libwebp_anim_encoder_deps="libwebp"
 libx262_encoder_deps="libx262"
 libx264_encoder_deps="libx264"
-libx264_encoder_select="atsc_a53"
+libx264_encoder_select="atsc_a53 h264parse"
 libx264rgb_encoder_deps="libx264"
 libx264rgb_encoder_select="libx264_encoder"
 libx265_encoder_deps="libx265"
diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index 10d646bd76..10dfda125d 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@@ -34,9 +34,12 @@ 
 #include "avcodec.h"
 #include "codec_internal.h"
 #include "encode.h"
+#include "get_bits.h"
+#include "h264_ps.h"
 #include "internal.h"
 #include "packet_internal.h"
 #include "atsc_a53.h"
+#include "put_bits.h"
 #include "sei.h"
 
 #include <x264.h>
@@ -865,6 +868,124 @@  static int convert_pix_fmt(enum AVPixelFormat pix_fmt)
     return 0;
 }
 
+static int set_avcc_extradata(AVCodecContext *avctx, x264_nal_t *nal, int nnal)
+{
+    x264_nal_t *sps_nal = NULL;
+    x264_nal_t *pps_nal = NULL;
+    GetBitContext gbc;
+    PutBitContext pbc;
+    H264ParamSets ps = { 0 };
+    uint8_t *p;
+    int ret, profile;
+
+    /* We know it's in the order of SPS/PPS/SEI, but it's not documented in x264 API.
+     * The x264 param i_sps_id implies there is a single pair of SPS/PPS.
+     */
+    for (int i = 0; i < nnal; i++) {
+        if (nal[i].i_type == NAL_SPS)
+            sps_nal = &nal[i];
+        else if (nal[i].i_type == NAL_PPS)
+            pps_nal = &nal[i];
+    }
+    av_assert0(sps_nal);
+    av_assert0(pps_nal);
+
+    // +11 for AVCDecoderConfigurationRecord, will shrink to the real size finally
+    avctx->extradata_size = sps_nal->i_payload + pps_nal->i_payload + 11;
+    avctx->extradata = av_mallocz(avctx->extradata_size + AV_INPUT_BUFFER_MIN_SIZE);
+    if (!avctx->extradata)
+        return AVERROR(ENOMEM);
+
+    // Skip size part
+    p = sps_nal->p_payload + 4;
+    init_get_bits8(&gbc, p, sps_nal->i_payload - 4);
+    skip_bits(&gbc, 8);
+    ret = ff_h264_decode_seq_parameter_set(&gbc, avctx, &ps, 0);
+    if (ret < 0)
+        return ret;
+
+    for (int i = 0; i < FF_ARRAY_ELEMS(ps.sps_list); i++) {
+        if (ps.sps_list[i]) {
+            ps.sps = ps.sps_list[i];
+            break;
+        }
+    }
+
+    // Now create AVCDecoderConfigurationRecord
+    init_put_bits(&pbc, avctx->extradata, avctx->extradata_size);
+    put_bits(&pbc, 8, 1); // version
+    put_bits(&pbc, 8, ps.sps->profile_idc); // AVCProfileIndication
+    put_bits(&pbc, 8, ps.sps->constraint_set_flags); // profile_compatibility
+    put_bits(&pbc, 8, ps.sps->level_idc); // AVCLevelIndication
+    put_bits(&pbc, 11, 0x7FF);
+
+    p = sps_nal->p_payload + 4;
+    put_bits(&pbc, 5, 1); // numOfSequenceParameterSets
+    put_bits(&pbc, 16, sps_nal->i_payload - 4); // sequenceParameterSetLength
+    for (int i = 0; i < sps_nal->i_payload - 4; i++)
+        put_bits(&pbc, 8, p[i]);
+
+    p = pps_nal->p_payload + 4;
+    put_bits(&pbc, 8, 1); // numOfPictureParameterSets
+    put_bits(&pbc, 16, pps_nal->i_payload - 4); // pictureParameterSetLength
+    for (int i = 0; i < pps_nal->i_payload - 4; i++)
+        put_bits(&pbc, 8, p[i]);
+
+    profile = ps.sps->profile_idc;
+    if (profile != 66 && profile != 77 && profile != 88) {
+        put_bits(&pbc, 6, 0x3F);
+        put_bits(&pbc, 2, ps.sps->chroma_format_idc);
+        put_bits(&pbc, 5, 0x1F);
+        put_bits(&pbc, 3, ps.sps->bit_depth_luma - 8);
+        put_bits(&pbc, 5, 0x1F);
+        put_bits(&pbc, 3, ps.sps->bit_depth_chroma - 8);
+        put_bits(&pbc, 8, 0);
+    }
+    flush_put_bits(&pbc);
+    avctx->extradata_size = put_bytes_output(&pbc);
+
+    ff_h264_ps_uninit(&ps);
+
+    return 0;
+}
+
+static int set_extradata(AVCodecContext *avctx)
+{
+    X264Context *x4 = avctx->priv_data;
+    x264_nal_t *nal;
+    uint8_t *p;
+    int nnal, s;
+
+    s = x264_encoder_headers(x4->enc, &nal, &nnal);
+    if (s < 0)
+        return AVERROR_EXTERNAL;
+
+    if (!x4->params.b_annexb)
+        return set_avcc_extradata(avctx, nal, nnal);
+
+    avctx->extradata = p = av_mallocz(s + AV_INPUT_BUFFER_PADDING_SIZE);
+    if (!p)
+        return AVERROR(ENOMEM);
+
+    for (int i = 0; i < nnal; i++) {
+        /* Don't put the SEI in extradata. */
+        if (nal[i].i_type == NAL_SEI) {
+            av_log(avctx, AV_LOG_INFO, "%s\n", nal[i].p_payload + 25);
+            x4->sei_size = nal[i].i_payload;
+            x4->sei = av_malloc(x4->sei_size);
+            if (!x4->sei)
+                return AVERROR(ENOMEM);
+            memcpy(x4->sei, nal[i].p_payload, nal[i].i_payload);
+            continue;
+        }
+        memcpy(p, nal[i].p_payload, nal[i].i_payload);
+        p += nal[i].i_payload;
+    }
+    avctx->extradata_size = p - avctx->extradata;
+
+    return 0;
+}
+
 #define PARSE_X264_OPT(name, var)\
     if (x4->var && x264_param_parse(&x4->params, name, x4->var) < 0) {\
         av_log(avctx, AV_LOG_ERROR, "Error parsing option '%s' with value '%s'.\n", name, x4->var);\
@@ -1233,30 +1354,9 @@  FF_ENABLE_DEPRECATION_WARNINGS
         return AVERROR_EXTERNAL;
 
     if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
-        x264_nal_t *nal;
-        uint8_t *p;
-        int nnal, s, i;
-
-        s = x264_encoder_headers(x4->enc, &nal, &nnal);
-        avctx->extradata = p = av_mallocz(s + AV_INPUT_BUFFER_PADDING_SIZE);
-        if (!p)
-            return AVERROR(ENOMEM);
-
-        for (i = 0; i < nnal; i++) {
-            /* Don't put the SEI in extradata. */
-            if (nal[i].i_type == NAL_SEI) {
-                av_log(avctx, AV_LOG_INFO, "%s\n", nal[i].p_payload+25);
-                x4->sei_size = nal[i].i_payload;
-                x4->sei      = av_malloc(x4->sei_size);
-                if (!x4->sei)
-                    return AVERROR(ENOMEM);
-                memcpy(x4->sei, nal[i].p_payload, nal[i].i_payload);
-                continue;
-            }
-            memcpy(p, nal[i].p_payload, nal[i].i_payload);
-            p += nal[i].i_payload;
-        }
-        avctx->extradata_size = p - avctx->extradata;
+        ret = set_extradata(avctx);
+        if (ret < 0)
+            return ret;
     }
 
     cpb_props = ff_encode_add_cpb_side_data(avctx);