diff mbox series

[FFmpeg-devel,v5,04/10] avcodec: add MP4 to annexb support for H266/VVC

Message ID 20230103134040.41140-5-thomas.ff@spin-digital.com
State New
Headers show
Series Add support for H266/VVC | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Thomas Siedel Jan. 3, 2023, 1:40 p.m. UTC
Add parser for VVC MP4 to Annex B byte stream format.

Co-authored-by: Nuo Mi <nuomi2021@gmail.com>
---
 configure                        |   1 +
 libavcodec/Makefile              |   2 +
 libavcodec/bitstream_filters.c   |   2 +
 libavcodec/h266_metadata_bsf.c   | 146 ++++++++++++++
 libavcodec/vvc_mp4toannexb_bsf.c | 329 +++++++++++++++++++++++++++++++
 5 files changed, 480 insertions(+)
 create mode 100644 libavcodec/h266_metadata_bsf.c
 create mode 100644 libavcodec/vvc_mp4toannexb_bsf.c

Comments

Zhao Zhili Feb. 1, 2023, 3:34 p.m. UTC | #1
> On Jan 3, 2023, at 21:40, Thomas Siedel <thomas.ff@spin-digital.com> wrote:
> 
> Add parser for VVC MP4 to Annex B byte stream format.
> 
> Co-authored-by: Nuo Mi <nuomi2021@gmail.com>
> ---
> configure                        |   1 +
> libavcodec/Makefile              |   2 +
> libavcodec/bitstream_filters.c   |   2 +
> libavcodec/h266_metadata_bsf.c   | 146 ++++++++++++++
> libavcodec/vvc_mp4toannexb_bsf.c | 329 +++++++++++++++++++++++++++++++

Please add these bsfs in separate patches.

> 5 files changed, 480 insertions(+)
> create mode 100644 libavcodec/h266_metadata_bsf.c
> create mode 100644 libavcodec/vvc_mp4toannexb_bsf.c

I don’t know the reason behind h265_metadata_bsf/hevc_mp4toannexb_bsf,
but I prefer use the same prefix unless there are technical reasons.
Such alias is annoying when browsing and searching the source code.

> 
> diff --git a/configure b/configure
> index 2408dca0f5..776a972663 100755
> --- a/configure
> +++ b/configure
> @@ -3286,6 +3286,7 @@ mjpeg2jpeg_bsf_select="jpegtables"
> mpeg2_metadata_bsf_select="cbs_mpeg2"
> trace_headers_bsf_select="cbs"
> vp9_metadata_bsf_select="cbs_vp9"
> +vvc_metadata_bsf_select="cbs_h266"
> 
> # external libraries
> aac_at_decoder_deps="audiotoolbox"
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index 3e858b200b..2dee099f25 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -1236,6 +1236,8 @@ OBJS-$(CONFIG_VP9_METADATA_BSF)           += vp9_metadata_bsf.o
> OBJS-$(CONFIG_VP9_RAW_REORDER_BSF)        += vp9_raw_reorder_bsf.o
> OBJS-$(CONFIG_VP9_SUPERFRAME_BSF)         += vp9_superframe_bsf.o
> OBJS-$(CONFIG_VP9_SUPERFRAME_SPLIT_BSF)   += vp9_superframe_split_bsf.o
> +OBJS-$(CONFIG_VVC_METADATA_BSF)           += h266_metadata_bsf.o
> +OBJS-$(CONFIG_VVC_MP4TOANNEXB_BSF)        += vvc_mp4toannexb_bsf.o
> 
> # thread libraries
> OBJS-$(HAVE_LIBC_MSVCRT)               += file_open.o
> diff --git a/libavcodec/bitstream_filters.c b/libavcodec/bitstream_filters.c
> index a3bebefe5f..403884f3d7 100644
> --- a/libavcodec/bitstream_filters.c
> +++ b/libavcodec/bitstream_filters.c
> @@ -64,6 +64,8 @@ extern const FFBitStreamFilter ff_vp9_metadata_bsf;
> extern const FFBitStreamFilter ff_vp9_raw_reorder_bsf;
> extern const FFBitStreamFilter ff_vp9_superframe_bsf;
> extern const FFBitStreamFilter ff_vp9_superframe_split_bsf;
> +extern const FFBitStreamFilter ff_vvc_mp4toannexb_bsf;
> +extern const FFBitStreamFilter ff_vvc_metadata_bsf;

Please sort by alphabetical order.

> 
> #include "libavcodec/bsf_list.c"
> 
> diff --git a/libavcodec/h266_metadata_bsf.c b/libavcodec/h266_metadata_bsf.c
> new file mode 100644
> index 0000000000..f2bd2f31f3
> --- /dev/null
> +++ b/libavcodec/h266_metadata_bsf.c
> @@ -0,0 +1,146 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "libavutil/common.h"
> +#include "libavutil/opt.h"
> +
> +#include "bsf.h"
> +#include "bsf_internal.h"
> +#include "cbs.h"
> +#include "cbs_bsf.h"
> +#include "cbs_h266.h"
> +#include "vvc.h"
> +
> +#define IS_SLICE(nut) (nut <= VVC_RASL_NUT || (nut >= VVC_IDR_W_RADL && nut <= VVC_GDR_NUT))
> +#define IS_PH(nut) (nut == VVC_PH_NUT)

They are duplicated inside vvc_parser.c. How about add a prefix and share these macros?

> +
> +typedef struct VVCMetadataContext {
> +    CBSBSFContext common;
> +
> +    H266RawAUD aud_nal;
> +
> +    int aud;
> +} VVCMetadataContext;
> +
> +static int h266_metadata_update_fragment(AVBSFContext *bsf, AVPacket *pkt,
> +                                         CodedBitstreamFragment *pu)
> +{
> +    VVCMetadataContext *ctx = bsf->priv_data;
> +    int err, i;
> +
> +    // If an AUD is present, it must be the first NAL unit.
> +    if (pu->units[0].type == VVC_AUD_NUT) {
> +        if (ctx->aud == BSF_ELEMENT_REMOVE)
> +            ff_cbs_delete_unit(pu, 0);
> +    } else {
> +        if (ctx->aud == BSF_ELEMENT_INSERT) {

Should check pkt != NULL here.

`else if` can save one level of indentation.

> +            const H266RawSlice *first_slice = NULL;
> +            const H266RawPH *ph = NULL;
> +            H266RawAUD *aud = &ctx->aud_nal;
> +            int pic_type = 0, temporal_id = 8, layer_id = 0;
> +            for (i = 0; i < pu->nb_units; i++) {
> +                const H266RawNALUnitHeader *nal = pu->units[i].content;
> +                if (!nal)
> +                    continue;
> +                if (nal->nuh_temporal_id_plus1 < temporal_id + 1)
> +                    temporal_id = nal->nuh_temporal_id_plus1 - 1;
> +                if (IS_PH(nal->nal_unit_type)) {
> +                    ph = pu->units[i].content;
> +                } else if (IS_SLICE(nal->nal_unit_type)) {
> +                    const H266RawSlice *slice = pu->units[i].content;
> +                    layer_id = nal->nuh_layer_id;
> +                    if (slice->header.sh_slice_type == VVC_SLICE_TYPE_B &&
> +                        pic_type < 2)
> +                        pic_type = 2;
> +                    if (slice->header.sh_slice_type == VVC_SLICE_TYPE_P &&
> +                        pic_type < 1)
> +                        pic_type = 1;
> +                    if (!first_slice) {
> +                        first_slice = slice;
> +                        if (first_slice->header.
> +                            sh_picture_header_in_slice_header_flag)
> +                            ph = &first_slice->header.sh_picture_header;
> +                        else if (!ph)
> +                            break;
> +                    }
> +                }
> +            }
> +            if (!ph) {
> +                av_log(bsf, AV_LOG_ERROR, "no avaliable picture header");
> +                return AVERROR_INVALIDDATA;
> +            }
> +
> +            aud->nal_unit_header = (H266RawNALUnitHeader) {
> +                .nal_unit_type         = VVC_AUD_NUT,
> +                .nuh_layer_id          = layer_id,
> +                .nuh_temporal_id_plus1 = temporal_id + 1,
> +            };
> +            aud->aud_pic_type = pic_type;
> +            aud->aud_irap_or_gdr_flag = ph->ph_gdr_or_irap_pic_flag;
> +
> +            err = ff_cbs_insert_unit_content(pu, 0, VVC_AUD_NUT, aud, NULL);
> +            if (err < 0) {
> +                av_log(bsf, AV_LOG_ERROR, "Failed to insert AUD.\n");
> +                return err;
> +            }
> +        }
> +    }
> +    return 0;
> +}
> +
> +static const CBSBSFType h266_metadata_type = {
> +    .codec_id        = AV_CODEC_ID_VVC,
> +    .fragment_name   = "access unit",
> +    .unit_name       = "NAL unit",
> +    .update_fragment = &h266_metadata_update_fragment,
> +};
> +
> +static int vvc_metadata_init(AVBSFContext *bsf)
> +{
> +    return ff_cbs_bsf_generic_init(bsf, &h266_metadata_type);
> +}
> +
> +#define OFFSET(x) offsetof(VVCMetadataContext, x)
> +#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_BSF_PARAM)
> +static const AVOption vvc_metadata_options[] = {
> +    BSF_ELEMENT_OPTIONS_PIR("aud", "Access Unit Delimiter NAL units",
> +                            aud, FLAGS),
> +
> +    { NULL }
> +};
> +
> +static const AVClass vvc_metadata_class = {
> +    .class_name = "vvc_metadata_bsf",
> +    .item_name  = av_default_item_name,
> +    .option     = vvc_metadata_options,
> +    .version    = LIBAVUTIL_VERSION_INT,
> +};
> +
> +static const enum AVCodecID vvc_metadata_codec_ids[] = {
> +    AV_CODEC_ID_VVC, AV_CODEC_ID_NONE,
> +};
> +
> +const FFBitStreamFilter ff_vvc_metadata_bsf = {
> +    .p.name         = "vvc_metadata",
> +    .p.codec_ids    = vvc_metadata_codec_ids,
> +    .p.priv_class   = &vvc_metadata_class,
> +    .priv_data_size = sizeof(VVCMetadataContext),
> +    .init           = &vvc_metadata_init,
> +    .close          = &ff_cbs_bsf_generic_close,
> +    .filter         = &ff_cbs_bsf_generic_filter,
> +};

The function of vvc_metadata is very limited, I’d like to suggest
add more features with v2. It can be done after other patches.

> diff --git a/libavcodec/vvc_mp4toannexb_bsf.c b/libavcodec/vvc_mp4toannexb_bsf.c
> new file mode 100644
> index 0000000000..bdf40b531f
> --- /dev/null
> +++ b/libavcodec/vvc_mp4toannexb_bsf.c
> @@ -0,0 +1,329 @@
> +/*
> + * VVC MP4 to Annex B byte stream format filter
> + * Copyright (c) 2022, Thomas Siedel
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include <string.h>
> +
> +#include "libavutil/intreadwrite.h"
> +#include "libavutil/mem.h"
> +
> +#include "avcodec.h"
> +#include "bsf.h"
> +#include "bsf_internal.h"
> +#include "bytestream.h"
> +#include "vvc.h"
> +
> +#include "libavcodec/get_bits.h"
> +
> +#define MIN_VVCC_LENGTH 23
> +
> +typedef struct VVCBSFContext {
> +    uint8_t length_size;
> +    int extradata_parsed;
> +} VVCBSFContext;
> +
> +static int vvc_extradata_to_annexb(AVBSFContext *ctx)
> +{
> +    GetByteContext gb;
> +    int length_size, num_arrays, i, j;
> +    int ret = 0;
> +    int temp = 0;
> +    int ptl_present;
> +
> +    uint8_t *new_extradata = NULL;
> +    size_t new_extradata_size = 0;
> +
> +    int max_picture_width = 0;
> +    int max_picture_height = 0;
> +    int avg_frame_rate = 0;
> +
> +    bytestream2_init(&gb, ctx->par_in->extradata, ctx->par_in->extradata_size);
> +    temp = bytestream2_get_byte(&gb);
> +    length_size = ((temp & 6) >> 1) + 1;
> +    ptl_present = temp & 1;
> +    if (ptl_present) {
> +        int num_bytes_constraint_info;
> +        int general_profile_idc;
> +        int general_tier_flag;
> +        int general_level_idc;
> +        int ptl_frame_only_constraint_flag;
> +        int ptl_multi_layer_enabled_flag;
> +        int ptl_num_sub_profiles;
> +        int temp3, temp4, temp5;
> +        int temp2 = bytestream2_get_be16(&gb);
> +        int ols_idx = (temp2 >> 7) & 0x1ff;
> +        int num_sublayers = (temp2 >> 4) & 0x7;
> +        int constant_frame_rate = (temp2 >> 2) & 0x3;
> +        int chroma_format_idc = temp2 & 0x3;
> +        int bit_depth_minus8 = (bytestream2_get_byte(&gb) >> 5) & 0x7;
> +        av_log(ctx, AV_LOG_DEBUG,
> +               "bit_depth_minus8 %d chroma_format_idc %d\n", bit_depth_minus8,
> +               chroma_format_idc);
> +        av_log(ctx, AV_LOG_DEBUG, "constant_frame_rate %d, ols_idx %d\n",
> +               constant_frame_rate, ols_idx);
> +        // VvcPTLRecord(num_sublayers) native_ptl
> +        temp3 = bytestream2_get_byte(&gb);
> +        num_bytes_constraint_info = (temp3) & 0x3f;
> +        temp4 = bytestream2_get_byte(&gb);
> +        general_profile_idc = (temp4 >> 1) & 0x7f;
> +        general_tier_flag = (temp4) & 1;
> +        general_level_idc = bytestream2_get_byte(&gb);
> +        av_log(ctx, AV_LOG_DEBUG,
> +               "general_profile_idc %d, general_tier_flag %d, general_level_idc %d, num_sublayers %d num_bytes_constraint_info %d\n",
> +               general_profile_idc, general_tier_flag, general_level_idc,
> +               num_sublayers, num_bytes_constraint_info);
> +
> +        temp5 = bytestream2_get_byte(&gb);
> +        ptl_frame_only_constraint_flag = (temp5 >> 7) & 0x1;
> +        ptl_multi_layer_enabled_flag   = (temp5 >> 6) & 0x1;
> +        for (i = 0; i < num_bytes_constraint_info - 1; i++) {
> +            // unsigned int(8*num_bytes_constraint_info - 2) general_constraint_info;
> +            bytestream2_get_byte(&gb);
> +        }
> +
> +        av_log(ctx, AV_LOG_DEBUG,
> +               "ptl_multi_layer_enabled_flag %d, ptl_frame_only_constraint_flag %d\n",
> +               ptl_multi_layer_enabled_flag, ptl_frame_only_constraint_flag);
> +
> +        if (num_sublayers > 1) {
> +            int temp6 = bytestream2_get_byte(&gb);
> +            uint8_t ptl_sublayer_level_present_flag[8] = { 0 };
> +            //uint8_t sublayer_level_idc[8] = {0};
> +            for (i = num_sublayers - 2; i >= 0; i--) {
> +                ptl_sublayer_level_present_flag[i] =
> +                    (temp6 >> (7 - (num_sublayers - 2 - i))) & 0x01;
> +            }
> +            // for (j=num_sublayers; j<=8 && num_sublayers > 1; j++)
> +            //     bit(1) ptl_reserved_zero_bit = 0;
> +            for (i = num_sublayers - 2; i >= 0; i--) {
> +                if (ptl_sublayer_level_present_flag[i]) {
> +                    //sublayer_level_idc[i] = bytestream2_get_byte(&gb);
> +                }
> +            }
> +        }
> +
> +        ptl_num_sub_profiles = bytestream2_get_byte(&gb);
> +        for (j = 0; j < ptl_num_sub_profiles; j++) {
> +            // unsigned int(32) general_sub_profile_idc[j];
> +            bytestream2_get_be16(&gb);
> +            bytestream2_get_be16(&gb);
> +        }
> +
> +        max_picture_width = bytestream2_get_be16(&gb);  // unsigned_int(16) max_picture_width;
> +        max_picture_height = bytestream2_get_be16(&gb); // unsigned_int(16) max_picture_height;
> +        avg_frame_rate = bytestream2_get_be16(&gb);     // unsigned int(16) avg_frame_rate; }
> +        av_log(ctx, AV_LOG_DEBUG,
> +               "max_picture_width %d, max_picture_height %d, avg_frame_rate %d\n",
> +               max_picture_width, max_picture_height, avg_frame_rate);
> +    }
> +
> +    num_arrays = bytestream2_get_byte(&gb);
> +
> +    for (i = 0; i < num_arrays; i++) {
> +        int cnt;
> +        int type = bytestream2_get_byte(&gb) & 0x1f;
> +
> +        if (type == VVC_OPI_NUT || type == VVC_DCI_NUT)
> +            cnt = 1;
> +        else
> +            cnt = bytestream2_get_be16(&gb);
> +
> +        av_log(ctx, AV_LOG_DEBUG, "nalu_type %d cnt %d\n", type, cnt);
> +
> +        if (!(type == VVC_OPI_NUT || type == VVC_DCI_NUT ||
> +              type == VVC_VPS_NUT || type == VVC_SPS_NUT || type == VVC_PPS_NUT
> +              || type == VVC_PREFIX_SEI_NUT || type == VVC_SUFFIX_SEI_NUT)) {
> +            av_log(ctx, AV_LOG_ERROR,
> +                   "Invalid NAL unit type in extradata: %d\n", type);
> +            ret = AVERROR_INVALIDDATA;
> +            goto fail;
> +        }
> +
> +        for (j = 0; j < cnt; j++) {
> +            int nalu_len = bytestream2_get_be16(&gb);
> +
> +            if (4 + AV_INPUT_BUFFER_PADDING_SIZE + nalu_len >
> +                SIZE_MAX - new_extradata_size) {
> +                ret = AVERROR_INVALIDDATA;
> +                goto fail;
> +            }
> +            ret = av_reallocp(&new_extradata, new_extradata_size + nalu_len + 4
> +                              + AV_INPUT_BUFFER_PADDING_SIZE);
> +            if (ret < 0)
> +                goto fail;
> +
> +            AV_WB32(new_extradata + new_extradata_size, 1); // add the startcode
> +            bytestream2_get_buffer(&gb, new_extradata + new_extradata_size + 4,
> +                                   nalu_len);
> +            new_extradata_size += 4 + nalu_len;
> +            memset(new_extradata + new_extradata_size, 0,
> +                   AV_INPUT_BUFFER_PADDING_SIZE);
> +        }
> +    }
> +
> +    av_freep(&ctx->par_out->extradata);
> +    ctx->par_out->extradata = new_extradata;
> +    ctx->par_out->extradata_size = new_extradata_size;
> +
> +    if (!new_extradata_size)
> +        av_log(ctx, AV_LOG_WARNING, "No parameter sets in the extradata\n");
> +
> +    return length_size;
> +  fail:
> +    av_freep(&new_extradata);
> +    return ret;
> +}
> +
> +static int vvc_mp4toannexb_init(AVBSFContext *ctx)
> +{
> +    VVCBSFContext *s = ctx->priv_data;
> +    int ret;
> +
> +    if (ctx->par_in->extradata_size < MIN_VVCC_LENGTH ||
> +        AV_RB24(ctx->par_in->extradata) == 1 ||
> +        AV_RB32(ctx->par_in->extradata) == 1) {
> +        av_log(ctx, AV_LOG_VERBOSE,
> +               "The input looks like it is Annex B already\n");
> +    } else {
> +        ret = vvc_extradata_to_annexb(ctx);
> +        if (ret < 0)
> +            return ret;
> +        s->length_size = ret;
> +        s->extradata_parsed = 1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int vvc_mp4toannexb_filter(AVBSFContext *ctx, AVPacket *out)
> +{
> +    VVCBSFContext *s = ctx->priv_data;
> +    AVPacket *in;
> +    GetByteContext gb;
> +
> +    int is_irap = 0;
> +    int added_extra = 0;
> +    int i, ret = 0;
> +
> +    ret = ff_bsf_get_packet(ctx, &in);
> +    if (ret < 0)
> +        return ret;
> +
> +    if (!s->extradata_parsed) {
> +        av_packet_move_ref(out, in);
> +        av_packet_free(&in);
> +        return 0;
> +    }
> +
> +    bytestream2_init(&gb, in->data, in->size);
> +
> +    /* check if this packet contains an IRAP. The extradata will need to be added before any potential PH_NUT */
> +    while (bytestream2_get_bytes_left(&gb)) {
> +        uint32_t nalu_size = 0;
> +        int nalu_type;
> +
> +        if (bytestream2_get_bytes_left(&gb) < s->length_size) {
> +            ret = AVERROR_INVALIDDATA;
> +            goto fail;
> +        }
> +
> +        for (i = 0; i < s->length_size; i++)
> +            nalu_size = (nalu_size << 8) | bytestream2_get_byte(&gb);
> +
> +        if (nalu_size < 2 || nalu_size > bytestream2_get_bytes_left(&gb)) {
> +            ret = AVERROR_INVALIDDATA;
> +            goto fail;
> +        }
> +
> +        nalu_type = (bytestream2_peek_be16(&gb) >> 3) & 0x1f;
> +        is_irap = nalu_type >= VVC_IDR_W_RADL && nalu_type <= VVC_RSV_IRAP_11;
> +        if (is_irap) {
> +            break;
> +        }
> +        bytestream2_seek(&gb, nalu_size, SEEK_CUR);
> +    }
> +
> +    bytestream2_seek(&gb, 0, SEEK_SET);
> +    while (bytestream2_get_bytes_left(&gb)) {
> +        uint32_t nalu_size = 0;
> +        int nalu_type;
> +        int add_extradata, extra_size, prev_size;
> +
> +        if (bytestream2_get_bytes_left(&gb) < s->length_size) {
> +            ret = AVERROR_INVALIDDATA;
> +            goto fail;
> +        }
> +
> +        for (i = 0; i < s->length_size; i++)
> +            nalu_size = (nalu_size << 8) | bytestream2_get_byte(&gb);
> +
> +        if (nalu_size < 2 || nalu_size > bytestream2_get_bytes_left(&gb)) {
> +            ret = AVERROR_INVALIDDATA;
> +            goto fail;
> +        }
> +
> +        nalu_type = (bytestream2_peek_be16(&gb) >> 3) & 0x1f;
> +
> +        /* prepend extradata to IRAP frames */
> +        add_extradata = is_irap && nalu_type != VVC_AUD_NUT && !added_extra;
> +        extra_size = add_extradata * ctx->par_out->extradata_size;
> +        added_extra |= add_extradata;
> +
> +        if (FFMIN(INT_MAX, SIZE_MAX) < 4ULL + nalu_size + extra_size) {
> +            ret = AVERROR_INVALIDDATA;
> +            goto fail;
> +        }
> +
> +        prev_size = out->size;
> +
> +        ret = av_grow_packet(out, 4 + nalu_size + extra_size);
> +        if (ret < 0)
> +            goto fail;
> +
> +        if (extra_size)
> +            memcpy(out->data + prev_size, ctx->par_out->extradata, extra_size);
> +        AV_WB32(out->data + prev_size + extra_size, 1);
> +        bytestream2_get_buffer(&gb, out->data + prev_size + 4 + extra_size,
> +                               nalu_size);
> +    }
> +
> +    ret = av_packet_copy_props(out, in);
> +    if (ret < 0)
> +        goto fail;
> +
> +  fail:
> +    if (ret < 0)
> +        av_packet_unref(out);
> +    av_packet_free(&in);
> +
> +    return ret;
> +}
> +
> +static const enum AVCodecID codec_ids[] = {
> +    AV_CODEC_ID_VVC, AV_CODEC_ID_NONE,
> +};
> +
> +const FFBitStreamFilter ff_vvc_mp4toannexb_bsf = {
> +    .p.name         = "vvc_mp4toannexb",
> +    .p.codec_ids    = codec_ids,
> +    .priv_data_size = sizeof(VVCBSFContext),
> +    .init           = vvc_mp4toannexb_init,
> +    .filter         = vvc_mp4toannexb_filter,
> +};
> -- 
> 2.25.1
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Thomas Siedel Feb. 10, 2023, 6:40 p.m. UTC | #2
On Wed, 1 Feb 2023 at 16:34, zhilizhao(赵志立) <quinkblack@foxmail.com> wrote:

>
>
> > On Jan 3, 2023, at 21:40, Thomas Siedel <thomas.ff@spin-digital.com>
> wrote:
> >
> > Add parser for VVC MP4 to Annex B byte stream format.
> >
> > Co-authored-by: Nuo Mi <nuomi2021@gmail.com>
> > ---
> > configure                        |   1 +
> > libavcodec/Makefile              |   2 +
> > libavcodec/bitstream_filters.c   |   2 +
> > libavcodec/h266_metadata_bsf.c   | 146 ++++++++++++++
> > libavcodec/vvc_mp4toannexb_bsf.c | 329 +++++++++++++++++++++++++++++++
>
> Please add these bsfs in separate patches.
>

Thank you for your feedback.
I separated them now in the new patch set version 6.


> > 5 files changed, 480 insertions(+)
> > create mode 100644 libavcodec/h266_metadata_bsf.c
> > create mode 100644 libavcodec/vvc_mp4toannexb_bsf.c
>
> I don’t know the reason behind h265_metadata_bsf/hevc_mp4toannexb_bsf,
> but I prefer use the same prefix unless there are technical reasons.
> Such alias is annoying when browsing and searching the source code.
>

I took the naming of the HEVC implementation as an orientation.
But yes, I agree that this kind of mix might be problematic.
In the new patch set, I now decided on the ITU syntax and renamed all the
files and functions from vvc to h266 prefix.
With this, they are now also alphabetically close to h264 and hevc.
However, I did not rename the VVC_* enum types, because this would change a
lot of code, and I am worried that
this makes things more complicated when comparing different versions of the
code, e.g., for review.

What do you, and the other developers, think about this?
Which naming pattern would you prefer? Should the enums also be renamed?


> > --- a/libavcodec/bitstream_filters.c
> > +++ b/libavcodec/bitstream_filters.c
> > @@ -64,6 +64,8 @@ extern const FFBitStreamFilter ff_vp9_metadata_bsf;
> > extern const FFBitStreamFilter ff_vp9_raw_reorder_bsf;
> > extern const FFBitStreamFilter ff_vp9_superframe_bsf;
> > extern const FFBitStreamFilter ff_vp9_superframe_split_bsf;
> > +extern const FFBitStreamFilter ff_vvc_mp4toannexb_bsf;
> > +extern const FFBitStreamFilter ff_vvc_metadata_bsf;
>
> Please sort by alphabetical order.
>

OK, this is done now in the new patch set version.


>
> >
> > #include "libavcodec/bsf_list.c"
> >
> > diff --git a/libavcodec/h266_metadata_bsf.c
> b/libavcodec/h266_metadata_bsf.c
> > new file mode 100644
> > index 0000000000..f2bd2f31f3
> > --- /dev/null
> > +++ b/libavcodec/h266_metadata_bsf.c
> > @@ -0,0 +1,146 @@
> > +/*
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> > + */
> > +
> > +#include "libavutil/common.h"
> > +#include "libavutil/opt.h"
> > +
> > +#include "bsf.h"
> > +#include "bsf_internal.h"
> > +#include "cbs.h"
> > +#include "cbs_bsf.h"
> > +#include "cbs_h266.h"
> > +#include "vvc.h"
> > +
> > +#define IS_SLICE(nut) (nut <= VVC_RASL_NUT || (nut >= VVC_IDR_W_RADL &&
> nut <= VVC_GDR_NUT))
> > +#define IS_PH(nut) (nut == VVC_PH_NUT)
>
> They are duplicated inside vvc_parser.c. How about add a prefix and share
> these macros?
>

Thank you for the suggestion.
I did it now in the new patch set version.


>
> > +
> > +typedef struct VVCMetadataContext {
> > +    CBSBSFContext common;
> > +
> > +    H266RawAUD aud_nal;
> > +
> > +    int aud;
> > +} VVCMetadataContext;
> > +
> > +static int h266_metadata_update_fragment(AVBSFContext *bsf, AVPacket
> *pkt,
> > +                                         CodedBitstreamFragment *pu)
> > +{
> > +    VVCMetadataContext *ctx = bsf->priv_data;
> > +    int err, i;
> > +
> > +    // If an AUD is present, it must be the first NAL unit.
> > +    if (pu->units[0].type == VVC_AUD_NUT) {
> > +        if (ctx->aud == BSF_ELEMENT_REMOVE)
> > +            ff_cbs_delete_unit(pu, 0);
> > +    } else {
> > +        if (ctx->aud == BSF_ELEMENT_INSERT) {
>
> Should check pkt != NULL here.
>
> `else if` can save one level of indentation.
>

OK, this is done now in the new patch set version.


> > +
> > +const FFBitStreamFilter ff_vvc_metadata_bsf = {
> > +    .p.name         = "vvc_metadata",
> > +    .p.codec_ids    = vvc_metadata_codec_ids,
> > +    .p.priv_class   = &vvc_metadata_class,
> > +    .priv_data_size = sizeof(VVCMetadataContext),
> > +    .init           = &vvc_metadata_init,
> > +    .close          = &ff_cbs_bsf_generic_close,
> > +    .filter         = &ff_cbs_bsf_generic_filter,
> > +};
>
> The function of vvc_metadata is very limited, I’d like to suggest
> add more features with v2. It can be done after other patches.
>
>
Indeed (currently only aud is implemented).
More metadata should be implemented after the first version is merged.
diff mbox series

Patch

diff --git a/configure b/configure
index 2408dca0f5..776a972663 100755
--- a/configure
+++ b/configure
@@ -3286,6 +3286,7 @@  mjpeg2jpeg_bsf_select="jpegtables"
 mpeg2_metadata_bsf_select="cbs_mpeg2"
 trace_headers_bsf_select="cbs"
 vp9_metadata_bsf_select="cbs_vp9"
+vvc_metadata_bsf_select="cbs_h266"
 
 # external libraries
 aac_at_decoder_deps="audiotoolbox"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 3e858b200b..2dee099f25 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1236,6 +1236,8 @@  OBJS-$(CONFIG_VP9_METADATA_BSF)           += vp9_metadata_bsf.o
 OBJS-$(CONFIG_VP9_RAW_REORDER_BSF)        += vp9_raw_reorder_bsf.o
 OBJS-$(CONFIG_VP9_SUPERFRAME_BSF)         += vp9_superframe_bsf.o
 OBJS-$(CONFIG_VP9_SUPERFRAME_SPLIT_BSF)   += vp9_superframe_split_bsf.o
+OBJS-$(CONFIG_VVC_METADATA_BSF)           += h266_metadata_bsf.o
+OBJS-$(CONFIG_VVC_MP4TOANNEXB_BSF)        += vvc_mp4toannexb_bsf.o
 
 # thread libraries
 OBJS-$(HAVE_LIBC_MSVCRT)               += file_open.o
diff --git a/libavcodec/bitstream_filters.c b/libavcodec/bitstream_filters.c
index a3bebefe5f..403884f3d7 100644
--- a/libavcodec/bitstream_filters.c
+++ b/libavcodec/bitstream_filters.c
@@ -64,6 +64,8 @@  extern const FFBitStreamFilter ff_vp9_metadata_bsf;
 extern const FFBitStreamFilter ff_vp9_raw_reorder_bsf;
 extern const FFBitStreamFilter ff_vp9_superframe_bsf;
 extern const FFBitStreamFilter ff_vp9_superframe_split_bsf;
+extern const FFBitStreamFilter ff_vvc_mp4toannexb_bsf;
+extern const FFBitStreamFilter ff_vvc_metadata_bsf;
 
 #include "libavcodec/bsf_list.c"
 
diff --git a/libavcodec/h266_metadata_bsf.c b/libavcodec/h266_metadata_bsf.c
new file mode 100644
index 0000000000..f2bd2f31f3
--- /dev/null
+++ b/libavcodec/h266_metadata_bsf.c
@@ -0,0 +1,146 @@ 
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/common.h"
+#include "libavutil/opt.h"
+
+#include "bsf.h"
+#include "bsf_internal.h"
+#include "cbs.h"
+#include "cbs_bsf.h"
+#include "cbs_h266.h"
+#include "vvc.h"
+
+#define IS_SLICE(nut) (nut <= VVC_RASL_NUT || (nut >= VVC_IDR_W_RADL && nut <= VVC_GDR_NUT))
+#define IS_PH(nut) (nut == VVC_PH_NUT)
+
+typedef struct VVCMetadataContext {
+    CBSBSFContext common;
+
+    H266RawAUD aud_nal;
+
+    int aud;
+} VVCMetadataContext;
+
+static int h266_metadata_update_fragment(AVBSFContext *bsf, AVPacket *pkt,
+                                         CodedBitstreamFragment *pu)
+{
+    VVCMetadataContext *ctx = bsf->priv_data;
+    int err, i;
+
+    // If an AUD is present, it must be the first NAL unit.
+    if (pu->units[0].type == VVC_AUD_NUT) {
+        if (ctx->aud == BSF_ELEMENT_REMOVE)
+            ff_cbs_delete_unit(pu, 0);
+    } else {
+        if (ctx->aud == BSF_ELEMENT_INSERT) {
+            const H266RawSlice *first_slice = NULL;
+            const H266RawPH *ph = NULL;
+            H266RawAUD *aud = &ctx->aud_nal;
+            int pic_type = 0, temporal_id = 8, layer_id = 0;
+            for (i = 0; i < pu->nb_units; i++) {
+                const H266RawNALUnitHeader *nal = pu->units[i].content;
+                if (!nal)
+                    continue;
+                if (nal->nuh_temporal_id_plus1 < temporal_id + 1)
+                    temporal_id = nal->nuh_temporal_id_plus1 - 1;
+                if (IS_PH(nal->nal_unit_type)) {
+                    ph = pu->units[i].content;
+                } else if (IS_SLICE(nal->nal_unit_type)) {
+                    const H266RawSlice *slice = pu->units[i].content;
+                    layer_id = nal->nuh_layer_id;
+                    if (slice->header.sh_slice_type == VVC_SLICE_TYPE_B &&
+                        pic_type < 2)
+                        pic_type = 2;
+                    if (slice->header.sh_slice_type == VVC_SLICE_TYPE_P &&
+                        pic_type < 1)
+                        pic_type = 1;
+                    if (!first_slice) {
+                        first_slice = slice;
+                        if (first_slice->header.
+                            sh_picture_header_in_slice_header_flag)
+                            ph = &first_slice->header.sh_picture_header;
+                        else if (!ph)
+                            break;
+                    }
+                }
+            }
+            if (!ph) {
+                av_log(bsf, AV_LOG_ERROR, "no avaliable picture header");
+                return AVERROR_INVALIDDATA;
+            }
+
+            aud->nal_unit_header = (H266RawNALUnitHeader) {
+                .nal_unit_type         = VVC_AUD_NUT,
+                .nuh_layer_id          = layer_id,
+                .nuh_temporal_id_plus1 = temporal_id + 1,
+            };
+            aud->aud_pic_type = pic_type;
+            aud->aud_irap_or_gdr_flag = ph->ph_gdr_or_irap_pic_flag;
+
+            err = ff_cbs_insert_unit_content(pu, 0, VVC_AUD_NUT, aud, NULL);
+            if (err < 0) {
+                av_log(bsf, AV_LOG_ERROR, "Failed to insert AUD.\n");
+                return err;
+            }
+        }
+    }
+    return 0;
+}
+
+static const CBSBSFType h266_metadata_type = {
+    .codec_id        = AV_CODEC_ID_VVC,
+    .fragment_name   = "access unit",
+    .unit_name       = "NAL unit",
+    .update_fragment = &h266_metadata_update_fragment,
+};
+
+static int vvc_metadata_init(AVBSFContext *bsf)
+{
+    return ff_cbs_bsf_generic_init(bsf, &h266_metadata_type);
+}
+
+#define OFFSET(x) offsetof(VVCMetadataContext, x)
+#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_BSF_PARAM)
+static const AVOption vvc_metadata_options[] = {
+    BSF_ELEMENT_OPTIONS_PIR("aud", "Access Unit Delimiter NAL units",
+                            aud, FLAGS),
+
+    { NULL }
+};
+
+static const AVClass vvc_metadata_class = {
+    .class_name = "vvc_metadata_bsf",
+    .item_name  = av_default_item_name,
+    .option     = vvc_metadata_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+static const enum AVCodecID vvc_metadata_codec_ids[] = {
+    AV_CODEC_ID_VVC, AV_CODEC_ID_NONE,
+};
+
+const FFBitStreamFilter ff_vvc_metadata_bsf = {
+    .p.name         = "vvc_metadata",
+    .p.codec_ids    = vvc_metadata_codec_ids,
+    .p.priv_class   = &vvc_metadata_class,
+    .priv_data_size = sizeof(VVCMetadataContext),
+    .init           = &vvc_metadata_init,
+    .close          = &ff_cbs_bsf_generic_close,
+    .filter         = &ff_cbs_bsf_generic_filter,
+};
diff --git a/libavcodec/vvc_mp4toannexb_bsf.c b/libavcodec/vvc_mp4toannexb_bsf.c
new file mode 100644
index 0000000000..bdf40b531f
--- /dev/null
+++ b/libavcodec/vvc_mp4toannexb_bsf.c
@@ -0,0 +1,329 @@ 
+/*
+ * VVC MP4 to Annex B byte stream format filter
+ * Copyright (c) 2022, Thomas Siedel
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <string.h>
+
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mem.h"
+
+#include "avcodec.h"
+#include "bsf.h"
+#include "bsf_internal.h"
+#include "bytestream.h"
+#include "vvc.h"
+
+#include "libavcodec/get_bits.h"
+
+#define MIN_VVCC_LENGTH 23
+
+typedef struct VVCBSFContext {
+    uint8_t length_size;
+    int extradata_parsed;
+} VVCBSFContext;
+
+static int vvc_extradata_to_annexb(AVBSFContext *ctx)
+{
+    GetByteContext gb;
+    int length_size, num_arrays, i, j;
+    int ret = 0;
+    int temp = 0;
+    int ptl_present;
+
+    uint8_t *new_extradata = NULL;
+    size_t new_extradata_size = 0;
+
+    int max_picture_width = 0;
+    int max_picture_height = 0;
+    int avg_frame_rate = 0;
+
+    bytestream2_init(&gb, ctx->par_in->extradata, ctx->par_in->extradata_size);
+    temp = bytestream2_get_byte(&gb);
+    length_size = ((temp & 6) >> 1) + 1;
+    ptl_present = temp & 1;
+    if (ptl_present) {
+        int num_bytes_constraint_info;
+        int general_profile_idc;
+        int general_tier_flag;
+        int general_level_idc;
+        int ptl_frame_only_constraint_flag;
+        int ptl_multi_layer_enabled_flag;
+        int ptl_num_sub_profiles;
+        int temp3, temp4, temp5;
+        int temp2 = bytestream2_get_be16(&gb);
+        int ols_idx = (temp2 >> 7) & 0x1ff;
+        int num_sublayers = (temp2 >> 4) & 0x7;
+        int constant_frame_rate = (temp2 >> 2) & 0x3;
+        int chroma_format_idc = temp2 & 0x3;
+        int bit_depth_minus8 = (bytestream2_get_byte(&gb) >> 5) & 0x7;
+        av_log(ctx, AV_LOG_DEBUG,
+               "bit_depth_minus8 %d chroma_format_idc %d\n", bit_depth_minus8,
+               chroma_format_idc);
+        av_log(ctx, AV_LOG_DEBUG, "constant_frame_rate %d, ols_idx %d\n",
+               constant_frame_rate, ols_idx);
+        // VvcPTLRecord(num_sublayers) native_ptl
+        temp3 = bytestream2_get_byte(&gb);
+        num_bytes_constraint_info = (temp3) & 0x3f;
+        temp4 = bytestream2_get_byte(&gb);
+        general_profile_idc = (temp4 >> 1) & 0x7f;
+        general_tier_flag = (temp4) & 1;
+        general_level_idc = bytestream2_get_byte(&gb);
+        av_log(ctx, AV_LOG_DEBUG,
+               "general_profile_idc %d, general_tier_flag %d, general_level_idc %d, num_sublayers %d num_bytes_constraint_info %d\n",
+               general_profile_idc, general_tier_flag, general_level_idc,
+               num_sublayers, num_bytes_constraint_info);
+
+        temp5 = bytestream2_get_byte(&gb);
+        ptl_frame_only_constraint_flag = (temp5 >> 7) & 0x1;
+        ptl_multi_layer_enabled_flag   = (temp5 >> 6) & 0x1;
+        for (i = 0; i < num_bytes_constraint_info - 1; i++) {
+            // unsigned int(8*num_bytes_constraint_info - 2) general_constraint_info;
+            bytestream2_get_byte(&gb);
+        }
+
+        av_log(ctx, AV_LOG_DEBUG,
+               "ptl_multi_layer_enabled_flag %d, ptl_frame_only_constraint_flag %d\n",
+               ptl_multi_layer_enabled_flag, ptl_frame_only_constraint_flag);
+
+        if (num_sublayers > 1) {
+            int temp6 = bytestream2_get_byte(&gb);
+            uint8_t ptl_sublayer_level_present_flag[8] = { 0 };
+            //uint8_t sublayer_level_idc[8] = {0};
+            for (i = num_sublayers - 2; i >= 0; i--) {
+                ptl_sublayer_level_present_flag[i] =
+                    (temp6 >> (7 - (num_sublayers - 2 - i))) & 0x01;
+            }
+            // for (j=num_sublayers; j<=8 && num_sublayers > 1; j++)
+            //     bit(1) ptl_reserved_zero_bit = 0;
+            for (i = num_sublayers - 2; i >= 0; i--) {
+                if (ptl_sublayer_level_present_flag[i]) {
+                    //sublayer_level_idc[i] = bytestream2_get_byte(&gb);
+                }
+            }
+        }
+
+        ptl_num_sub_profiles = bytestream2_get_byte(&gb);
+        for (j = 0; j < ptl_num_sub_profiles; j++) {
+            // unsigned int(32) general_sub_profile_idc[j];
+            bytestream2_get_be16(&gb);
+            bytestream2_get_be16(&gb);
+        }
+
+        max_picture_width = bytestream2_get_be16(&gb);  // unsigned_int(16) max_picture_width;
+        max_picture_height = bytestream2_get_be16(&gb); // unsigned_int(16) max_picture_height;
+        avg_frame_rate = bytestream2_get_be16(&gb);     // unsigned int(16) avg_frame_rate; }
+        av_log(ctx, AV_LOG_DEBUG,
+               "max_picture_width %d, max_picture_height %d, avg_frame_rate %d\n",
+               max_picture_width, max_picture_height, avg_frame_rate);
+    }
+
+    num_arrays = bytestream2_get_byte(&gb);
+
+    for (i = 0; i < num_arrays; i++) {
+        int cnt;
+        int type = bytestream2_get_byte(&gb) & 0x1f;
+
+        if (type == VVC_OPI_NUT || type == VVC_DCI_NUT)
+            cnt = 1;
+        else
+            cnt = bytestream2_get_be16(&gb);
+
+        av_log(ctx, AV_LOG_DEBUG, "nalu_type %d cnt %d\n", type, cnt);
+
+        if (!(type == VVC_OPI_NUT || type == VVC_DCI_NUT ||
+              type == VVC_VPS_NUT || type == VVC_SPS_NUT || type == VVC_PPS_NUT
+              || type == VVC_PREFIX_SEI_NUT || type == VVC_SUFFIX_SEI_NUT)) {
+            av_log(ctx, AV_LOG_ERROR,
+                   "Invalid NAL unit type in extradata: %d\n", type);
+            ret = AVERROR_INVALIDDATA;
+            goto fail;
+        }
+
+        for (j = 0; j < cnt; j++) {
+            int nalu_len = bytestream2_get_be16(&gb);
+
+            if (4 + AV_INPUT_BUFFER_PADDING_SIZE + nalu_len >
+                SIZE_MAX - new_extradata_size) {
+                ret = AVERROR_INVALIDDATA;
+                goto fail;
+            }
+            ret = av_reallocp(&new_extradata, new_extradata_size + nalu_len + 4
+                              + AV_INPUT_BUFFER_PADDING_SIZE);
+            if (ret < 0)
+                goto fail;
+
+            AV_WB32(new_extradata + new_extradata_size, 1); // add the startcode
+            bytestream2_get_buffer(&gb, new_extradata + new_extradata_size + 4,
+                                   nalu_len);
+            new_extradata_size += 4 + nalu_len;
+            memset(new_extradata + new_extradata_size, 0,
+                   AV_INPUT_BUFFER_PADDING_SIZE);
+        }
+    }
+
+    av_freep(&ctx->par_out->extradata);
+    ctx->par_out->extradata = new_extradata;
+    ctx->par_out->extradata_size = new_extradata_size;
+
+    if (!new_extradata_size)
+        av_log(ctx, AV_LOG_WARNING, "No parameter sets in the extradata\n");
+
+    return length_size;
+  fail:
+    av_freep(&new_extradata);
+    return ret;
+}
+
+static int vvc_mp4toannexb_init(AVBSFContext *ctx)
+{
+    VVCBSFContext *s = ctx->priv_data;
+    int ret;
+
+    if (ctx->par_in->extradata_size < MIN_VVCC_LENGTH ||
+        AV_RB24(ctx->par_in->extradata) == 1 ||
+        AV_RB32(ctx->par_in->extradata) == 1) {
+        av_log(ctx, AV_LOG_VERBOSE,
+               "The input looks like it is Annex B already\n");
+    } else {
+        ret = vvc_extradata_to_annexb(ctx);
+        if (ret < 0)
+            return ret;
+        s->length_size = ret;
+        s->extradata_parsed = 1;
+    }
+
+    return 0;
+}
+
+static int vvc_mp4toannexb_filter(AVBSFContext *ctx, AVPacket *out)
+{
+    VVCBSFContext *s = ctx->priv_data;
+    AVPacket *in;
+    GetByteContext gb;
+
+    int is_irap = 0;
+    int added_extra = 0;
+    int i, ret = 0;
+
+    ret = ff_bsf_get_packet(ctx, &in);
+    if (ret < 0)
+        return ret;
+
+    if (!s->extradata_parsed) {
+        av_packet_move_ref(out, in);
+        av_packet_free(&in);
+        return 0;
+    }
+
+    bytestream2_init(&gb, in->data, in->size);
+
+    /* check if this packet contains an IRAP. The extradata will need to be added before any potential PH_NUT */
+    while (bytestream2_get_bytes_left(&gb)) {
+        uint32_t nalu_size = 0;
+        int nalu_type;
+
+        if (bytestream2_get_bytes_left(&gb) < s->length_size) {
+            ret = AVERROR_INVALIDDATA;
+            goto fail;
+        }
+
+        for (i = 0; i < s->length_size; i++)
+            nalu_size = (nalu_size << 8) | bytestream2_get_byte(&gb);
+
+        if (nalu_size < 2 || nalu_size > bytestream2_get_bytes_left(&gb)) {
+            ret = AVERROR_INVALIDDATA;
+            goto fail;
+        }
+
+        nalu_type = (bytestream2_peek_be16(&gb) >> 3) & 0x1f;
+        is_irap = nalu_type >= VVC_IDR_W_RADL && nalu_type <= VVC_RSV_IRAP_11;
+        if (is_irap) {
+            break;
+        }
+        bytestream2_seek(&gb, nalu_size, SEEK_CUR);
+    }
+
+    bytestream2_seek(&gb, 0, SEEK_SET);
+    while (bytestream2_get_bytes_left(&gb)) {
+        uint32_t nalu_size = 0;
+        int nalu_type;
+        int add_extradata, extra_size, prev_size;
+
+        if (bytestream2_get_bytes_left(&gb) < s->length_size) {
+            ret = AVERROR_INVALIDDATA;
+            goto fail;
+        }
+
+        for (i = 0; i < s->length_size; i++)
+            nalu_size = (nalu_size << 8) | bytestream2_get_byte(&gb);
+
+        if (nalu_size < 2 || nalu_size > bytestream2_get_bytes_left(&gb)) {
+            ret = AVERROR_INVALIDDATA;
+            goto fail;
+        }
+
+        nalu_type = (bytestream2_peek_be16(&gb) >> 3) & 0x1f;
+
+        /* prepend extradata to IRAP frames */
+        add_extradata = is_irap && nalu_type != VVC_AUD_NUT && !added_extra;
+        extra_size = add_extradata * ctx->par_out->extradata_size;
+        added_extra |= add_extradata;
+
+        if (FFMIN(INT_MAX, SIZE_MAX) < 4ULL + nalu_size + extra_size) {
+            ret = AVERROR_INVALIDDATA;
+            goto fail;
+        }
+
+        prev_size = out->size;
+
+        ret = av_grow_packet(out, 4 + nalu_size + extra_size);
+        if (ret < 0)
+            goto fail;
+
+        if (extra_size)
+            memcpy(out->data + prev_size, ctx->par_out->extradata, extra_size);
+        AV_WB32(out->data + prev_size + extra_size, 1);
+        bytestream2_get_buffer(&gb, out->data + prev_size + 4 + extra_size,
+                               nalu_size);
+    }
+
+    ret = av_packet_copy_props(out, in);
+    if (ret < 0)
+        goto fail;
+
+  fail:
+    if (ret < 0)
+        av_packet_unref(out);
+    av_packet_free(&in);
+
+    return ret;
+}
+
+static const enum AVCodecID codec_ids[] = {
+    AV_CODEC_ID_VVC, AV_CODEC_ID_NONE,
+};
+
+const FFBitStreamFilter ff_vvc_mp4toannexb_bsf = {
+    .p.name         = "vvc_mp4toannexb",
+    .p.codec_ids    = codec_ids,
+    .priv_data_size = sizeof(VVCBSFContext),
+    .init           = vvc_mp4toannexb_init,
+    .filter         = vvc_mp4toannexb_filter,
+};