diff mbox

[FFmpeg-devel,v3] hevc_mp4toannexb: Insert correct parameter sets before IRAP

Message ID 20190905031425.hfwdliyowyvgp62d@manj
State Superseded
Headers show

Commit Message

Andriy Gelman Sept. 5, 2019, 3:14 a.m. UTC
Changes in v3:
    Patch 1/2
    - Fixed a bug where rbsp payload (without 0x03) was
      written into packet instead of the raw data (with
      0x03).  
    - Segment packets using ff_h2645_packet_split directly
      from mp4-style format without converting to annexb first.
    - Corrected fate test hash for hevc-bsf-mp4toannexb .

    Patch 2/2
    - Modified fate test so that the output file is
      always overwritten.  


I'm looking for some test data (hevc muxed in mp4) with several temporal layers
where the sub_layer_profile_present_flag[s] is set. Let me know if anyone can
help.

Thanks,
Andriy
From fd53cb5140aaeb71a55686b419c3e57ddebf5eb5 Mon Sep 17 00:00:00 2001
From: Andriy Gelman <andriy.gelman@gmail.com>
Date: Sun, 18 Aug 2019 21:48:43 -0400
Subject: [PATCH 1/2] hevc_mp4toannexb: Insert correct parameter sets before
 IRAP

Fixes #7799

Currently, the mp4toannexb filter always inserts the same extradata at
the start of the first IRAP unit. As in ticket #7799, this can lead to
decoding errors if modified parameter sets are signalled in-band.

This commit keeps track of the vps/sps/pps parameter sets during the
conversion. The correct combination is inserted at the start of the
first IRAP unit instead of the original extradata. SEI prefix nal units
are also cached and inserted after the parameter sets.

This commit also makes an update to the hevc-bsf-mp4toannexb fate
test since the result before this patch contained duplicate vps/sps/pps
parameter sets in-band.
---
 libavcodec/hevc_mp4toannexb_bsf.c | 394 +++++++++++++++++++++++++++---
 tests/fate/hevc.mak               |   2 +-
 2 files changed, 356 insertions(+), 40 deletions(-)

Comments

Andreas Rheinhardt Sept. 8, 2019, 2:18 p.m. UTC | #1
Andriy Gelman:
> Changes in v3:
>     Patch 1/2
>     - Fixed a bug where rbsp payload (without 0x03) was
>       written into packet instead of the raw data (with
>       0x03).  
>     - Segment packets using ff_h2645_packet_split directly
>       from mp4-style format without converting to annexb first.
>     - Corrected fate test hash for hevc-bsf-mp4toannexb .
> 
>     Patch 2/2
>     - Modified fate test so that the output file is
>       always overwritten.  
> 
> 
> I'm looking for some test data (hevc muxed in mp4) with several temporal layers
> where the sub_layer_profile_present_flag[s] is set. Let me know if anyone can
> help.
> 
> Thanks,
> Andriy
> 
Hello,

> diff --git a/libavcodec/hevc_mp4toannexb_bsf.c
b/libavcodec/hevc_mp4toannexb_bsf.c
> index 09bce5b34c..11ff262a92 100644
> --- a/libavcodec/hevc_mp4toannexb_bsf.c
> +++ b/libavcodec/hevc_mp4toannexb_bsf.c
> @@ -28,14 +28,185 @@
>  #include "bsf.h"
>  #include "bytestream.h"
>  #include "hevc.h"
> -
> -#define MIN_HEVCC_LENGTH 23
> +#include "h2645_parse.h"
> +#include "hevc_ps.h"
> +#include "golomb.h"
> +
> +#define MIN_HEVCC_LENGTH            23
> +#define PROFILE_WITHOUT_IDC_BITS    88
> +#define IS_IRAP(s)                  ((s)->type >= 16 && (s)->type
<= 23)
> +#define IS_PARAMSET(s)              ((s)->type >= 32 && (s)->type
<= 34)
> +
> +#define WRITE_NAL(pkt, prev_size, in_buffer) do {
         \
> +    AV_WB32((pkt)->data + (prev_size), 1);
         \
> +    prev_size += 4;
         \
> +    memcpy((pkt)->data + (prev_size), (in_buffer)->data,
(in_buffer)->size); \
> +    prev_size += (in_buffer)->size;
         \
> +} while (0)
> +
> +typedef struct Param {
> +    AVBufferRef *raw_data; /*store a copy of the raw data to
construct extradata*/
> +    int ref;  /*stores the ref of the higher level parameter set*/
> +} Param;
> +
> +/*modified version of HEVCParamSets to store bytestream and
reference to previous level*/
> +typedef struct ParamSets {
> +    Param vps_list[HEVC_MAX_VPS_COUNT];
> +    Param sps_list[HEVC_MAX_SPS_COUNT];
> +    Param pps_list[HEVC_MAX_PPS_COUNT];
> +
> +    AVBufferRef *sei_prefix;
> +} ParamSets;
>
>  typedef struct HEVCBSFContext {
> -    uint8_t  length_size;
> -    int      extradata_parsed;
> +    uint8_t      length_size;
> +    int          extradata_parsed;
> +    ParamSets    ps; /*make own of version of HEVCParamSets store
copy of the bytestream*/
>  } HEVCBSFContext;
>
> +
> +static int parse_vps(AVBSFContext* ctx, H2645NAL *nal)

Inconsistent placement of *.

> +{
> +    int vps_id = 0;

Unnecessary initialization.

> +    Param *param_ptr;
> +
> +    HEVCBSFContext *s = ctx->priv_data;
> +    ParamSets *ps     = &s->ps;
> +
> +    GetBitContext *gb = &nal->gb;
> +    int nal_size      = nal->raw_size;
> +
> +    vps_id = get_bits(gb, 4);
> +    if (vps_id >= HEVC_MAX_VPS_COUNT) {
> +        av_log(ctx, AV_LOG_ERROR, "VPS id out of range: %d\n", vps_id);
> +        return AVERROR_INVALIDDATA;
> +    }
> +
> +    param_ptr = &ps->vps_list[vps_id];
> +    if (param_ptr->raw_data && param_ptr->raw_data->size == nal_size &&
> +        !memcmp(param_ptr->raw_data->data, nal->raw_data, nal_size)) {
> +        av_log(ctx, AV_LOG_DEBUG, "Parsed VPS id: %d. Copy already
exists in parameter set\n", vps_id);
> +    } else {
> +        AVBufferRef *vps_buf = av_buffer_allocz(nal_size);

There is no need to zero it as you overwrite the whole buffer a few
lines below. This goes for SPS and PPS as well.
Moreover, the very same code for updating the extradata basically
exists three times. How about factoring it out?

> +        if (!vps_buf)
> +            return AVERROR(ENOMEM);
> +
> +        /*copy raw data into vps_buf buffer and replace existing
copy in ps*/
> +        memcpy(vps_buf->data, nal->raw_data, nal_size);
> +        av_buffer_unref(&param_ptr->raw_data);
> +        param_ptr->raw_data = vps_buf;
> +    }
> +    return 0;
> +}
> +
> +static int parse_sps(AVBSFContext *ctx, H2645NAL *nal)
> +{
> +    int sps_id, vps_ref, max_sub_layers_minus1;
> +    int i;
> +    Param *param_ptr;
> +
> +    HEVCBSFContext *s = ctx->priv_data;
> +    ParamSets *ps     = &s->ps;
> +
> +    GetBitContext *gb = &nal->gb;
> +    int nal_size      = nal->raw_size;
> +
> +    uint8_t sub_layer_profile_present_flag[HEVC_MAX_SUB_LAYERS];
> +    uint8_t sub_layer_level_present_flag[HEVC_MAX_SUB_LAYERS];
> +
> +    vps_ref = get_bits(gb, 4);
> +    if (vps_ref >= HEVC_MAX_VPS_COUNT) {
> +        av_log(ctx, AV_LOG_ERROR, "VPS id out of range: %d\n",
vps_ref);
> +        return AVERROR_INVALIDDATA;
> +    }
> +
> +    max_sub_layers_minus1 = get_bits(gb, 3);
> +    skip_bits1(gb);
/*sps_temporal_id_nesting_flag*/
> +    skip_bits(gb, PROFILE_WITHOUT_IDC_BITS); /*profile_tier_level*/
> +    skip_bits(gb, 8);                        /*general_level_idc*/
> +
> +    for (i = 0; i < max_sub_layers_minus1; ++i) {
> +        sub_layer_profile_present_flag[i] = get_bits1(gb);
> +        sub_layer_level_present_flag[i]   = get_bits1(gb);
> +    }
> +
> +    if (max_sub_layers_minus1 > 0)
> +        for (i = max_sub_layers_minus1; i < 8; i++)
> +            skip_bits(gb, 2); /*reserved_zero_2bits[i]*/
> +
> +    for (i = 0; i < max_sub_layers_minus1; ++i) {
> +        if (sub_layer_profile_present_flag[i])
> +            skip_bits(gb, PROFILE_WITHOUT_IDC_BITS);
/*profile_tier_level*/
> +        if (sub_layer_level_present_flag[i])
> +            skip_bits(gb, 8);
/*sub_layer_level_idc*/
> +    }
> +
> +    /*we only need the sps_id index*/
> +    sps_id = get_ue_golomb_long(gb);

get_ue_golomb_long returns unsigned. If the return value is so big
that it is no longer representable in an int (like sps_id), it will be
negative and the check below will not catch this.
Furthermore, get_ue_golomb_long is not a really safe function to begin
with: E.g. if there are 32 zero bits in a row, get_ue_golomb_long will
treat it as if there were 31 zero bits followed by the actual value +1
coded on the next 32 bits. And there is no check for end-of-bitstream.

These remarks apply to parse_pps as well.

> +    if (sps_id >= HEVC_MAX_SPS_COUNT) {
> +        av_log(ctx, AV_LOG_ERROR, "SPS id out of range: %d\n", sps_id);
> +        return AVERROR_INVALIDDATA;
> +    }
> +
> +    param_ptr = &ps->sps_list[sps_id];
> +    if (param_ptr->raw_data && param_ptr->raw_data->size == nal_size &&
> +        !memcmp(param_ptr->raw_data->data, nal->raw_data, nal_size)) {
> +        av_log(ctx, AV_LOG_DEBUG, "Parsed SPS id: %d. Copy already
exists in parameter set\n", sps_id);
> +    } else {
> +        AVBufferRef *sps_buf = av_buffer_allocz(nal_size);
> +        if (!sps_buf)
> +            return AVERROR(ENOMEM);
> +
> +        /*copy raw data into vps_buf buffer and replace existing
copy in ps*/
> +        memcpy(sps_buf->data, nal->raw_data, nal_size);
> +        av_buffer_unref(&param_ptr->raw_data);
> +        param_ptr->raw_data = sps_buf;
> +        param_ptr->ref      = vps_ref;
> +    }
> +    return 0;
> +}
> +
> +static int parse_pps(AVBSFContext *ctx, H2645NAL *nal)
> +{
> +    int pps_id, sps_ref;
> +    Param *param_ptr;
> +
> +    HEVCBSFContext *s = ctx->priv_data;
> +    ParamSets *ps     = &s->ps;
> +
> +    GetBitContext *gb = &nal->gb;
> +    int nal_size      = nal->raw_size;
> +
> +    pps_id = get_ue_golomb_long(gb);
> +    if (pps_id >= HEVC_MAX_PPS_COUNT) {
> +        av_log(ctx, AV_LOG_ERROR, "PPS id out of range: %d\n", pps_id);
> +        return AVERROR_INVALIDDATA;
> +    }
> +
> +    sps_ref = get_ue_golomb_long(gb);
> +    if (sps_ref >= HEVC_MAX_SPS_COUNT) {
> +        av_log(ctx, AV_LOG_ERROR, "SPS id out of range: %d\n",
sps_ref);
> +        return AVERROR_INVALIDDATA;
> +    }
> +
> +    param_ptr = &ps->pps_list[pps_id];
> +    if (param_ptr->raw_data && param_ptr->raw_data->size == nal_size &&
> +        !memcmp(param_ptr->raw_data->data, nal->raw_data, nal_size)) {
> +        av_log(ctx, AV_LOG_DEBUG, "Parsed PPS id: %d. Copy already
exists in parameter set\n", pps_id);
> +    } else {
> +        AVBufferRef *pps_buf = av_buffer_allocz(nal_size);
> +        if (!pps_buf)
> +            return AVERROR(ENOMEM);
> +
> +        /*copy raw data into vps_buf buffer and replace existing
copy in ps*/
> +        memcpy(pps_buf->data, nal->raw_data, nal_size);
> +        av_buffer_unref(&param_ptr->raw_data);
> +        param_ptr->raw_data = pps_buf;
> +        param_ptr->ref      = sps_ref;
> +    }
> +    return 0;
> +}
> +
>  static int hevc_extradata_to_annexb(AVBSFContext *ctx)
>  {
>      GetByteContext gb;
> @@ -94,10 +265,45 @@ fail:
>      return ret;
>  }
>
> +static int update_sei(AVBufferRef **sei, H2645NAL *nal)
> +{
> +    AVBufferRef *tmp;
> +
> +    av_buffer_unref(sei);
> +    tmp = av_buffer_alloc(nal->raw_size);
> +    if (!tmp)
> +        return AVERROR(ENOMEM);
> +
> +    memcpy(tmp->data, nal->raw_data, nal->raw_size);
> +    *sei = tmp;
> +
> +    return 0;
> +}
> +
> +static int update_paramset(AVBSFContext *ctx, H2645NAL *nal)
> +{
> +    int ret;
> +    switch (nal->type) {
> +        case (HEVC_NAL_VPS):
> +            if (ret = parse_vps(ctx, nal) < 0)

< has higher precedence than =, so that the above will be read as "if
(ret = (parse_vps(ctx, nal) < 0))", i.e. ret will be 0 or 1, so use
parentheses.

> +                return ret;
> +            break;
> +        case (HEVC_NAL_SPS):
> +            if (ret = parse_sps(ctx, nal) < 0)
> +                return ret;
> +            break;
> +        case (HEVC_NAL_PPS):
> +            if (ret = parse_pps(ctx, nal) < 0)
> +                return ret;
> +    }
> +    return 0;
> +}
> +
>  static int hevc_mp4toannexb_init(AVBSFContext *ctx)
>  {
>      HEVCBSFContext *s = ctx->priv_data;
> -    int ret;
> +    H2645Packet pkt;
> +    int i, ret = 0;
>
>      if (ctx->par_in->extradata_size < MIN_HEVCC_LENGTH ||
>          AV_RB24(ctx->par_in->extradata) == 1           ||
> @@ -110,7 +316,114 @@ static int hevc_mp4toannexb_init(AVBSFContext
*ctx)
>              return ret;
>          s->length_size      = ret;
>          s->extradata_parsed = 1;
> +
> +        memset(&pkt, 0, sizeof(H2645Packet));
> +        ret = ff_h2645_packet_split(&pkt, ctx->par_out->extradata,
ctx->par_out->extradata_size,
> +                                     ctx, 0, 0, AV_CODEC_ID_HEVC,
1, 0);
> +        if (ret < 0)
> +            goto done;
> +
> +        for (i = 0; i < pkt.nb_nals; ++i) {
> +            H2645NAL *nal = &pkt.nals[i];
> +
> +            /*current segmentation algorithm includes next 0x00
from next nal unit*/

You could use ff_h2645_packet_split to split the extradata as well.
See cbs_h2645_split_fragment in cbs_h2645.c for an example of this.

> +            if (nal->raw_data[nal->raw_size - 1] == 0x00)
> +                nal->raw_size--;
> +
> +            if (IS_PARAMSET(nal)) {
> +                ret = update_paramset(ctx, nal);
> +                if (ret < 0)
> +                    goto done;
> +                continue;
> +            }
> +
> +            if (nal->type == HEVC_NAL_SEI_PREFIX) {
> +                ret = update_sei(&s->ps.sei_prefix, nal);
> +                if (ret < 0)
> +                    goto done;
> +            }
> +        }
> +    }
> +
> +done:
> +    ff_h2645_packet_uninit(&pkt);

You are calling this even if the input looks like annex b in which
case pkt is uninitialized.

> +    return ret;
> +}
> +
> +static void ps_uninit(ParamSets* ps)

Uncommon placement of *.

> +{
> +    int i;
> +    for (i = 0; i < FF_ARRAY_ELEMS(ps->vps_list); i++)
> +       av_buffer_unref(&ps->vps_list[i].raw_data);
> +    for (i = 0; i < FF_ARRAY_ELEMS(ps->sps_list); i++)
> +       av_buffer_unref(&ps->sps_list[i].raw_data);
> +    for (i = 0; i < FF_ARRAY_ELEMS(ps->pps_list); i++)
> +       av_buffer_unref(&ps->pps_list[i].raw_data);
> +
> +    av_buffer_unref(&ps->sei_prefix);
> +}
> +
> +static void hevc_mp4toannexb_close(AVBSFContext *ctx)
> +{
> +    HEVCBSFContext *s = (HEVCBSFContext*)ctx->priv_data;
> +    ps_uninit(&s->ps);
> +}
> +
> +static int write_extradata(AVBSFContext *ctx, AVPacket *pkt_out,
H2645NAL *nal_irap)
> +{
> +    int ref, ret, prev_size;
> +    int new_extradata_size = 0;
> +
> +    HEVCBSFContext *s = (HEVCBSFContext*)ctx->priv_data;

ctx->priv_data is a pointer to void, so that the cast is unnecessary.

> +    ParamSets *ps     = &s->ps;
> +    GetBitContext *gb = &nal_irap->gb;
> +
> +    /* currently active pps parameter set */
> +    const Param *vps;
> +    const Param *sps;
> +    const Param *pps;
> +
> +    skip_bits1(gb); /*first_slice_ic_pic_flag*/
> +    skip_bits1(gb); /*no_output_of_prior_pics_flag*/
> +
> +    ref = get_ue_golomb_long(gb);
> +    if (ref >= HEVC_MAX_PPS_COUNT || !ps->pps_list[ref].raw_data) {
> +        av_log(ctx, AV_LOG_ERROR, "Invalid PPS: %d\n", ref);
> +        return AVERROR_INVALIDDATA;
> +    }
> +    pps                 = &ps->pps_list[ref];
> +    new_extradata_size += pps->raw_data->size + 4;
> +    ref                 = pps->ref;
> +
> +    if (ref >= HEVC_MAX_SPS_COUNT || !ps->sps_list[ref].raw_data) {
> +        av_log(ctx, AV_LOG_ERROR, "Invalid SPS: %d\n", ref);
> +        return AVERROR_INVALIDDATA;
>      }
> +    sps                 = &ps->sps_list[ref];
> +    new_extradata_size += sps->raw_data->size + 4;
> +    ref                 = sps->ref;
> +
> +    if (ref >= HEVC_MAX_VPS_COUNT || !ps->vps_list[ref].raw_data) {
> +        av_log(ctx, AV_LOG_ERROR, "Invalid VPS: %d\n", ref);
> +        return AVERROR_INVALIDDATA;
> +    }
> +    vps                 = &ps->vps_list[ref];
> +    new_extradata_size += vps->raw_data->size + 4;
> +
> +    if (ps->sei_prefix)
> +        new_extradata_size += ps->sei_prefix->size + 4;
> +
> +    prev_size = pkt_out->size;
> +    ret = av_grow_packet(pkt_out, new_extradata_size);
> +    if (ret < 0)
> +        return AVERROR(ENOMEM);

Just forward the error.

> +
> +    WRITE_NAL(pkt_out, prev_size, vps->raw_data);
> +    WRITE_NAL(pkt_out, prev_size, sps->raw_data);
> +    WRITE_NAL(pkt_out, prev_size, pps->raw_data);
> +
> +    if (ps->sei_prefix)
> +        WRITE_NAL(pkt_out, prev_size, ps->sei_prefix);
>
>      return 0;
>  }
> @@ -119,62 +432,64 @@ static int
hevc_mp4toannexb_filter(AVBSFContext *ctx, AVPacket *out)
>  {
>      HEVCBSFContext *s = ctx->priv_data;
>      AVPacket *in;
> -    GetByteContext gb;
> -
> -    int got_irap = 0;
> -    int i, ret = 0;
> +    H2645Packet pkt;
> +    int i, prev_size;
> +    int ret = 0;
> +    int first_irap = 0;
>
>      ret = ff_bsf_get_packet(ctx, &in);
>      if (ret < 0)
>          return ret;
>
> +    /*output the annexb nalu if extradata is not parsed*/
>      if (!s->extradata_parsed) {
>          av_packet_move_ref(out, in);
>          av_packet_free(&in);
>          return 0;
>      }
>
> -    bytestream2_init(&gb, in->data, in->size);
> +    memset(&pkt, 0, sizeof(H2645Packet));
> +    ret = ff_h2645_packet_split(&pkt, in->data, in->size, ctx, 1,
s->length_size, AV_CODEC_ID_HEVC, 1, 0);
> +    if (ret < 0)
> +        goto done;
>
> -    while (bytestream2_get_bytes_left(&gb)) {
> -        uint32_t nalu_size = 0;
> -        int      nalu_type;
> -        int is_irap, add_extradata, extra_size, prev_size;
> +    for (i = 0; i < pkt.nb_nals; ++i) {
>
> -        for (i = 0; i < s->length_size; i++)
> -            nalu_size = (nalu_size << 8) | bytestream2_get_byte(&gb);
> +        H2645NAL *nal = &pkt.nals[i];
>
> -        nalu_type = (bytestream2_peek_byte(&gb) >> 1) & 0x3f;
> +        if (IS_PARAMSET(nal)) {
> +            ret = update_paramset(ctx, nal);
> +            if (ret < 0)
> +                goto done;
> +            continue;
> +        }
>
> -        /* prepend extradata to IRAP frames */
> -        is_irap       = nalu_type >= 16 && nalu_type <= 23;
> -        add_extradata = is_irap && !got_irap;
> -        extra_size    = add_extradata * ctx->par_out->extradata_size;
> -        got_irap     |= is_irap;
> +        if (nal->type == HEVC_NAL_SEI_PREFIX) {
> +            ret = update_sei(&s->ps.sei_prefix, nal);
> +            if (ret < 0)
> +                goto done;
> +            continue;
> +        }
>
> -        if (SIZE_MAX - nalu_size < 4 ||
> -            SIZE_MAX - 4 - nalu_size < extra_size) {
> -            ret = AVERROR_INVALIDDATA;
> -            goto fail;
> +        if (!first_irap && IS_IRAP(nal)) {
> +            ret = write_extradata(ctx, out, nal);
> +            if (ret < 0)
> +                goto done;
> +            first_irap = 1;
>          }
>
> +        /*write to output packet*/
>          prev_size = out->size;
> -
> -        ret = av_grow_packet(out, 4 + nalu_size + extra_size);
> -        if (ret < 0)
> -            goto fail;
> -
> -        if (add_extradata)
> -            memcpy(out->data + prev_size, ctx->par_out->extradata,
extra_size);
> -        AV_WB32(out->data + prev_size + extra_size, 1);
> -        bytestream2_get_buffer(&gb, out->data + prev_size + 4 +
extra_size, nalu_size);
> +        av_grow_packet(out, nal->raw_size + 4);
> +        AV_WB32(out->data + prev_size, 1);
> +        prev_size += 4;
> +        memcpy(out->data + prev_size, nal->raw_data, nal->raw_size);
> +        prev_size += nal->raw_size;
>      }
>
> -    ret = av_packet_copy_props(out, in);
> -    if (ret < 0)
> -        goto fail;

You should not remove this; without it, the packet will be a naked
(refcounted) buffer without any other information like timing.

> +done:
> +    ff_h2645_packet_uninit(&pkt);
>
> -fail:
>      if (ret < 0)
>          av_packet_unref(out);
>      av_packet_free(&in);
> @@ -190,6 +505,7 @@ const AVBitStreamFilter ff_hevc_mp4toannexb_bsf = {
>      .name           = "hevc_mp4toannexb",
>      .priv_data_size = sizeof(HEVCBSFContext),
>      .init           = hevc_mp4toannexb_init,
> +    .close          = hevc_mp4toannexb_close,
>      .filter         = hevc_mp4toannexb_filter,
>      .codec_ids      = codec_ids,
>  };

General remarks:
1. I fail to see why you use refcounted buffers for the parameter
sets. Their reference count is always 1 anyway. If you add a size
field to Param, you should be able to remove the AVBufferRef.
2. The handling of SEI NAL units is not good at all:
a) There may be multiple SEI NAL units in front of a picture; you keep
only the last one.
b) You only insert an SEI NAL unit in front of an IRAP picture.
c) And if the current picture is an IRAP picture, there is no
guarantee that an SEI that is inserted is actually from the current
access unit and not from an earlier access unit. Maybe it doesn't
apply to this access unit at all.
3. PPS units are allowed to change from one access unit to the next;
yet the current code removes any parameter sets from the units and
only inserts them in front of IRAP units, so that the output stream
only contains the status of the parameter sets from the moment
directly before IRAP pictures.

- Andreas
Andriy Gelman Sept. 8, 2019, 3:30 p.m. UTC | #2
Hey Andreas, 

Thanks for reviewing. 
I've actually just finished the updated version that removes emulation 0x03 bytes 
from the relevant nal units only (as you suggested in the previous review). The
other nal units are just copied over.

I'll add your changes to that version

On Sun, 08. Sep 14:18, Andreas Rheinhardt wrote:
> Andriy Gelman:
> > Changes in v3:
> >     Patch 1/2
> >     - Fixed a bug where rbsp payload (without 0x03) was
> >       written into packet instead of the raw data (with
> >       0x03).  
> >     - Segment packets using ff_h2645_packet_split directly
> >       from mp4-style format without converting to annexb first.
> >     - Corrected fate test hash for hevc-bsf-mp4toannexb .
> > 
> >     Patch 2/2
> >     - Modified fate test so that the output file is
> >       always overwritten.  
> > 
> > 
> > I'm looking for some test data (hevc muxed in mp4) with several temporal layers
> > where the sub_layer_profile_present_flag[s] is set. Let me know if anyone can
> > help.
> > 
> > Thanks,
> > Andriy
> > 
> Hello,
> 
> > diff --git a/libavcodec/hevc_mp4toannexb_bsf.c
> b/libavcodec/hevc_mp4toannexb_bsf.c
> > index 09bce5b34c..11ff262a92 100644
> > --- a/libavcodec/hevc_mp4toannexb_bsf.c
> > +++ b/libavcodec/hevc_mp4toannexb_bsf.c
> > @@ -28,14 +28,185 @@
> >  #include "bsf.h"
> >  #include "bytestream.h"
> >  #include "hevc.h"
> > -
> > -#define MIN_HEVCC_LENGTH 23
> > +#include "h2645_parse.h"
> > +#include "hevc_ps.h"
> > +#include "golomb.h"
> > +
> > +#define MIN_HEVCC_LENGTH            23
> > +#define PROFILE_WITHOUT_IDC_BITS    88
> > +#define IS_IRAP(s)                  ((s)->type >= 16 && (s)->type
> <= 23)
> > +#define IS_PARAMSET(s)              ((s)->type >= 32 && (s)->type
> <= 34)
> > +
> > +#define WRITE_NAL(pkt, prev_size, in_buffer) do {
>          \
> > +    AV_WB32((pkt)->data + (prev_size), 1);
>          \
> > +    prev_size += 4;
>          \
> > +    memcpy((pkt)->data + (prev_size), (in_buffer)->data,
> (in_buffer)->size); \
> > +    prev_size += (in_buffer)->size;
>          \
> > +} while (0)
> > +
> > +typedef struct Param {
> > +    AVBufferRef *raw_data; /*store a copy of the raw data to
> construct extradata*/
> > +    int ref;  /*stores the ref of the higher level parameter set*/
> > +} Param;
> > +
> > +/*modified version of HEVCParamSets to store bytestream and
> reference to previous level*/
> > +typedef struct ParamSets {
> > +    Param vps_list[HEVC_MAX_VPS_COUNT];
> > +    Param sps_list[HEVC_MAX_SPS_COUNT];
> > +    Param pps_list[HEVC_MAX_PPS_COUNT];
> > +
> > +    AVBufferRef *sei_prefix;
> > +} ParamSets;
> >
> >  typedef struct HEVCBSFContext {
> > -    uint8_t  length_size;
> > -    int      extradata_parsed;
> > +    uint8_t      length_size;
> > +    int          extradata_parsed;
> > +    ParamSets    ps; /*make own of version of HEVCParamSets store
> copy of the bytestream*/
> >  } HEVCBSFContext;
> >
> > +
> > +static int parse_vps(AVBSFContext* ctx, H2645NAL *nal)
> 

> Inconsistent placement of *.
> 
> > +{
> > +    int vps_id = 0;
> 
> Unnecessary initialization.

Will remove initialization and change * position.

> 
> > +    Param *param_ptr;
> > +
> > +    HEVCBSFContext *s = ctx->priv_data;
> > +    ParamSets *ps     = &s->ps;
> > +
> > +    GetBitContext *gb = &nal->gb;
> > +    int nal_size      = nal->raw_size;
> > +
> > +    vps_id = get_bits(gb, 4);
> > +    if (vps_id >= HEVC_MAX_VPS_COUNT) {
> > +        av_log(ctx, AV_LOG_ERROR, "VPS id out of range: %d\n", vps_id);
> > +        return AVERROR_INVALIDDATA;
> > +    }
> > +
> > +    param_ptr = &ps->vps_list[vps_id];
> > +    if (param_ptr->raw_data && param_ptr->raw_data->size == nal_size &&
> > +        !memcmp(param_ptr->raw_data->data, nal->raw_data, nal_size)) {
> > +        av_log(ctx, AV_LOG_DEBUG, "Parsed VPS id: %d. Copy already
> exists in parameter set\n", vps_id);
> > +    } else {
> > +        AVBufferRef *vps_buf = av_buffer_allocz(nal_size);

> 
> There is no need to zero it as you overwrite the whole buffer a few
> lines below. This goes for SPS and PPS as well.
> Moreover, the very same code for updating the extradata basically
> exists three times. How about factoring it out?

ok, I'll refactor this part.

> 
> > +        if (!vps_buf)
> > +            return AVERROR(ENOMEM);
> > +
> > +        /*copy raw data into vps_buf buffer and replace existing
> copy in ps*/
> > +        memcpy(vps_buf->data, nal->raw_data, nal_size);
> > +        av_buffer_unref(&param_ptr->raw_data);
> > +        param_ptr->raw_data = vps_buf;
> > +    }
> > +    return 0;
> > +}
> > +
> > +static int parse_sps(AVBSFContext *ctx, H2645NAL *nal)
> > +{
> > +    int sps_id, vps_ref, max_sub_layers_minus1;
> > +    int i;
> > +    Param *param_ptr;
> > +
> > +    HEVCBSFContext *s = ctx->priv_data;
> > +    ParamSets *ps     = &s->ps;
> > +
> > +    GetBitContext *gb = &nal->gb;
> > +    int nal_size      = nal->raw_size;
> > +
> > +    uint8_t sub_layer_profile_present_flag[HEVC_MAX_SUB_LAYERS];
> > +    uint8_t sub_layer_level_present_flag[HEVC_MAX_SUB_LAYERS];
> > +
> > +    vps_ref = get_bits(gb, 4);
> > +    if (vps_ref >= HEVC_MAX_VPS_COUNT) {
> > +        av_log(ctx, AV_LOG_ERROR, "VPS id out of range: %d\n",
> vps_ref);
> > +        return AVERROR_INVALIDDATA;
> > +    }
> > +
> > +    max_sub_layers_minus1 = get_bits(gb, 3);
> > +    skip_bits1(gb);
> /*sps_temporal_id_nesting_flag*/
> > +    skip_bits(gb, PROFILE_WITHOUT_IDC_BITS); /*profile_tier_level*/
> > +    skip_bits(gb, 8);                        /*general_level_idc*/
> > +
> > +    for (i = 0; i < max_sub_layers_minus1; ++i) {
> > +        sub_layer_profile_present_flag[i] = get_bits1(gb);
> > +        sub_layer_level_present_flag[i]   = get_bits1(gb);
> > +    }
> > +
> > +    if (max_sub_layers_minus1 > 0)
> > +        for (i = max_sub_layers_minus1; i < 8; i++)
> > +            skip_bits(gb, 2); /*reserved_zero_2bits[i]*/
> > +
> > +    for (i = 0; i < max_sub_layers_minus1; ++i) {
> > +        if (sub_layer_profile_present_flag[i])
> > +            skip_bits(gb, PROFILE_WITHOUT_IDC_BITS);
> /*profile_tier_level*/
> > +        if (sub_layer_level_present_flag[i])
> > +            skip_bits(gb, 8);
> /*sub_layer_level_idc*/
> > +    }
> > +
> > +    /*we only need the sps_id index*/
> > +    sps_id = get_ue_golomb_long(gb);

> 
> get_ue_golomb_long returns unsigned. If the return value is so big
> that it is no longer representable in an int (like sps_id), it will be
> negative and the check below will not catch this.

Thanks, I'll add the check below.

> Furthermore, get_ue_golomb_long is not a really safe function to begin
> with: E.g. if there are 32 zero bits in a row, get_ue_golomb_long will
> treat it as if there were 31 zero bits followed by the actual value +1
> coded on the next 32 bits. And there is no check for end-of-bitstream.

Probably outside the scope of this patch. 
It should be addressed in another commit?

> 
> These remarks apply to parse_pps as well.
> 
> > +    if (sps_id >= HEVC_MAX_SPS_COUNT) {
> > +        av_log(ctx, AV_LOG_ERROR, "SPS id out of range: %d\n", sps_id);
> > +        return AVERROR_INVALIDDATA;
> > +    }
> > +
> > +    param_ptr = &ps->sps_list[sps_id];
> > +    if (param_ptr->raw_data && param_ptr->raw_data->size == nal_size &&
> > +        !memcmp(param_ptr->raw_data->data, nal->raw_data, nal_size)) {
> > +        av_log(ctx, AV_LOG_DEBUG, "Parsed SPS id: %d. Copy already
> exists in parameter set\n", sps_id);
> > +    } else {
> > +        AVBufferRef *sps_buf = av_buffer_allocz(nal_size);
> > +        if (!sps_buf)
> > +            return AVERROR(ENOMEM);
> > +
> > +        /*copy raw data into vps_buf buffer and replace existing
> copy in ps*/
> > +        memcpy(sps_buf->data, nal->raw_data, nal_size);
> > +        av_buffer_unref(&param_ptr->raw_data);
> > +        param_ptr->raw_data = sps_buf;
> > +        param_ptr->ref      = vps_ref;
> > +    }
> > +    return 0;
> > +}
> > +
> > +static int parse_pps(AVBSFContext *ctx, H2645NAL *nal)
> > +{
> > +    int pps_id, sps_ref;
> > +    Param *param_ptr;
> > +
> > +    HEVCBSFContext *s = ctx->priv_data;
> > +    ParamSets *ps     = &s->ps;
> > +
> > +    GetBitContext *gb = &nal->gb;
> > +    int nal_size      = nal->raw_size;
> > +
> > +    pps_id = get_ue_golomb_long(gb);
> > +    if (pps_id >= HEVC_MAX_PPS_COUNT) {
> > +        av_log(ctx, AV_LOG_ERROR, "PPS id out of range: %d\n", pps_id);
> > +        return AVERROR_INVALIDDATA;
> > +    }
> > +
> > +    sps_ref = get_ue_golomb_long(gb);
> > +    if (sps_ref >= HEVC_MAX_SPS_COUNT) {
> > +        av_log(ctx, AV_LOG_ERROR, "SPS id out of range: %d\n",
> sps_ref);
> > +        return AVERROR_INVALIDDATA;
> > +    }
> > +
> > +    param_ptr = &ps->pps_list[pps_id];
> > +    if (param_ptr->raw_data && param_ptr->raw_data->size == nal_size &&
> > +        !memcmp(param_ptr->raw_data->data, nal->raw_data, nal_size)) {
> > +        av_log(ctx, AV_LOG_DEBUG, "Parsed PPS id: %d. Copy already
> exists in parameter set\n", pps_id);
> > +    } else {
> > +        AVBufferRef *pps_buf = av_buffer_allocz(nal_size);
> > +        if (!pps_buf)
> > +            return AVERROR(ENOMEM);
> > +
> > +        /*copy raw data into vps_buf buffer and replace existing
> copy in ps*/
> > +        memcpy(pps_buf->data, nal->raw_data, nal_size);
> > +        av_buffer_unref(&param_ptr->raw_data);
> > +        param_ptr->raw_data = pps_buf;
> > +        param_ptr->ref      = sps_ref;
> > +    }
> > +    return 0;
> > +}
> > +
> >  static int hevc_extradata_to_annexb(AVBSFContext *ctx)
> >  {
> >      GetByteContext gb;
> > @@ -94,10 +265,45 @@ fail:
> >      return ret;
> >  }
> >
> > +static int update_sei(AVBufferRef **sei, H2645NAL *nal)
> > +{
> > +    AVBufferRef *tmp;
> > +
> > +    av_buffer_unref(sei);
> > +    tmp = av_buffer_alloc(nal->raw_size);
> > +    if (!tmp)
> > +        return AVERROR(ENOMEM);
> > +
> > +    memcpy(tmp->data, nal->raw_data, nal->raw_size);
> > +    *sei = tmp;
> > +
> > +    return 0;
> > +}
> > +
> > +static int update_paramset(AVBSFContext *ctx, H2645NAL *nal)
> > +{
> > +    int ret;
> > +    switch (nal->type) {
> > +        case (HEVC_NAL_VPS):
> > +            if (ret = parse_vps(ctx, nal) < 0)

> 
> < has higher precedence than =, so that the above will be read as "if
> (ret = (parse_vps(ctx, nal) < 0))", i.e. ret will be 0 or 1, so use
> parentheses.

Thanks, I'll change this.

> 
> > +                return ret;
> > +            break;
> > +        case (HEVC_NAL_SPS):
> > +            if (ret = parse_sps(ctx, nal) < 0)
> > +                return ret;
> > +            break;
> > +        case (HEVC_NAL_PPS):
> > +            if (ret = parse_pps(ctx, nal) < 0)
> > +                return ret;
> > +    }
> > +    return 0;
> > +}
> > +
> >  static int hevc_mp4toannexb_init(AVBSFContext *ctx)
> >  {
> >      HEVCBSFContext *s = ctx->priv_data;
> > -    int ret;
> > +    H2645Packet pkt;
> > +    int i, ret = 0;
> >
> >      if (ctx->par_in->extradata_size < MIN_HEVCC_LENGTH ||
> >          AV_RB24(ctx->par_in->extradata) == 1           ||
> > @@ -110,7 +316,114 @@ static int hevc_mp4toannexb_init(AVBSFContext
> *ctx)
> >              return ret;
> >          s->length_size      = ret;
> >          s->extradata_parsed = 1;
> > +
> > +        memset(&pkt, 0, sizeof(H2645Packet));
> > +        ret = ff_h2645_packet_split(&pkt, ctx->par_out->extradata,
> ctx->par_out->extradata_size,
> > +                                     ctx, 0, 0, AV_CODEC_ID_HEVC,
> 1, 0);
> > +        if (ret < 0)
> > +            goto done;
> > +
> > +        for (i = 0; i < pkt.nb_nals; ++i) {
> > +            H2645NAL *nal = &pkt.nals[i];
> > +
> > +            /*current segmentation algorithm includes next 0x00
> from next nal unit*/
> 
> You could use ff_h2645_packet_split to split the extradata as well.
> See cbs_h2645_split_fragment in cbs_h2645.c for an example of this.

ok, thanks, I'll look over cbs_h2645_split_fragment.

> 
> > +            if (nal->raw_data[nal->raw_size - 1] == 0x00)
> > +                nal->raw_size--;
> > +
> > +            if (IS_PARAMSET(nal)) {
> > +                ret = update_paramset(ctx, nal);
> > +                if (ret < 0)
> > +                    goto done;
> > +                continue;
> > +            }
> > +
> > +            if (nal->type == HEVC_NAL_SEI_PREFIX) {
> > +                ret = update_sei(&s->ps.sei_prefix, nal);
> > +                if (ret < 0)
> > +                    goto done;
> > +            }
> > +        }
> > +    }
> > +
> > +done:
> > +    ff_h2645_packet_uninit(&pkt);
> 
> You are calling this even if the input looks like annex b in which
> case pkt is uninitialized.

Will fix.

> 
> > +    return ret;
> > +}
> > +
> > +static void ps_uninit(ParamSets* ps)
> 
> Uncommon placement of *.
> 
> > +{
> > +    int i;
> > +    for (i = 0; i < FF_ARRAY_ELEMS(ps->vps_list); i++)
> > +       av_buffer_unref(&ps->vps_list[i].raw_data);
> > +    for (i = 0; i < FF_ARRAY_ELEMS(ps->sps_list); i++)
> > +       av_buffer_unref(&ps->sps_list[i].raw_data);
> > +    for (i = 0; i < FF_ARRAY_ELEMS(ps->pps_list); i++)
> > +       av_buffer_unref(&ps->pps_list[i].raw_data);
> > +
> > +    av_buffer_unref(&ps->sei_prefix);
> > +}
> > +
> > +static void hevc_mp4toannexb_close(AVBSFContext *ctx)
> > +{
> > +    HEVCBSFContext *s = (HEVCBSFContext*)ctx->priv_data;
> > +    ps_uninit(&s->ps);
> > +}
> > +
> > +static int write_extradata(AVBSFContext *ctx, AVPacket *pkt_out,
> H2645NAL *nal_irap)
> > +{
> > +    int ref, ret, prev_size;
> > +    int new_extradata_size = 0;
> > +
> > +    HEVCBSFContext *s = (HEVCBSFContext*)ctx->priv_data;
> 
> ctx->priv_data is a pointer to void, so that the cast is unnecessary.

ok

> 
> > +    ParamSets *ps     = &s->ps;
> > +    GetBitContext *gb = &nal_irap->gb;
> > +
> > +    /* currently active pps parameter set */
> > +    const Param *vps;
> > +    const Param *sps;
> > +    const Param *pps;
> > +
> > +    skip_bits1(gb); /*first_slice_ic_pic_flag*/
> > +    skip_bits1(gb); /*no_output_of_prior_pics_flag*/
> > +
> > +    ref = get_ue_golomb_long(gb);
> > +    if (ref >= HEVC_MAX_PPS_COUNT || !ps->pps_list[ref].raw_data) {
> > +        av_log(ctx, AV_LOG_ERROR, "Invalid PPS: %d\n", ref);
> > +        return AVERROR_INVALIDDATA;
> > +    }
> > +    pps                 = &ps->pps_list[ref];
> > +    new_extradata_size += pps->raw_data->size + 4;
> > +    ref                 = pps->ref;
> > +
> > +    if (ref >= HEVC_MAX_SPS_COUNT || !ps->sps_list[ref].raw_data) {
> > +        av_log(ctx, AV_LOG_ERROR, "Invalid SPS: %d\n", ref);
> > +        return AVERROR_INVALIDDATA;
> >      }
> > +    sps                 = &ps->sps_list[ref];
> > +    new_extradata_size += sps->raw_data->size + 4;
> > +    ref                 = sps->ref;
> > +
> > +    if (ref >= HEVC_MAX_VPS_COUNT || !ps->vps_list[ref].raw_data) {
> > +        av_log(ctx, AV_LOG_ERROR, "Invalid VPS: %d\n", ref);
> > +        return AVERROR_INVALIDDATA;
> > +    }
> > +    vps                 = &ps->vps_list[ref];
> > +    new_extradata_size += vps->raw_data->size + 4;
> > +
> > +    if (ps->sei_prefix)
> > +        new_extradata_size += ps->sei_prefix->size + 4;
> > +
> > +    prev_size = pkt_out->size;
> > +    ret = av_grow_packet(pkt_out, new_extradata_size);
> > +    if (ret < 0)
> > +        return AVERROR(ENOMEM);
> 

> Just forward the error.

ok

> 
> > +
> > +    WRITE_NAL(pkt_out, prev_size, vps->raw_data);
> > +    WRITE_NAL(pkt_out, prev_size, sps->raw_data);
> > +    WRITE_NAL(pkt_out, prev_size, pps->raw_data);
> > +
> > +    if (ps->sei_prefix)
> > +        WRITE_NAL(pkt_out, prev_size, ps->sei_prefix);
> >
> >      return 0;
> >  }
> > @@ -119,62 +432,64 @@ static int
> hevc_mp4toannexb_filter(AVBSFContext *ctx, AVPacket *out)
> >  {
> >      HEVCBSFContext *s = ctx->priv_data;
> >      AVPacket *in;
> > -    GetByteContext gb;
> > -
> > -    int got_irap = 0;
> > -    int i, ret = 0;
> > +    H2645Packet pkt;
> > +    int i, prev_size;
> > +    int ret = 0;
> > +    int first_irap = 0;
> >
> >      ret = ff_bsf_get_packet(ctx, &in);
> >      if (ret < 0)
> >          return ret;
> >
> > +    /*output the annexb nalu if extradata is not parsed*/
> >      if (!s->extradata_parsed) {
> >          av_packet_move_ref(out, in);
> >          av_packet_free(&in);
> >          return 0;
> >      }
> >
> > -    bytestream2_init(&gb, in->data, in->size);
> > +    memset(&pkt, 0, sizeof(H2645Packet));
> > +    ret = ff_h2645_packet_split(&pkt, in->data, in->size, ctx, 1,
> s->length_size, AV_CODEC_ID_HEVC, 1, 0);
> > +    if (ret < 0)
> > +        goto done;
> >
> > -    while (bytestream2_get_bytes_left(&gb)) {
> > -        uint32_t nalu_size = 0;
> > -        int      nalu_type;
> > -        int is_irap, add_extradata, extra_size, prev_size;
> > +    for (i = 0; i < pkt.nb_nals; ++i) {
> >
> > -        for (i = 0; i < s->length_size; i++)
> > -            nalu_size = (nalu_size << 8) | bytestream2_get_byte(&gb);
> > +        H2645NAL *nal = &pkt.nals[i];
> >
> > -        nalu_type = (bytestream2_peek_byte(&gb) >> 1) & 0x3f;
> > +        if (IS_PARAMSET(nal)) {
> > +            ret = update_paramset(ctx, nal);
> > +            if (ret < 0)
> > +                goto done;
> > +            continue;
> > +        }
> >
> > -        /* prepend extradata to IRAP frames */
> > -        is_irap       = nalu_type >= 16 && nalu_type <= 23;
> > -        add_extradata = is_irap && !got_irap;
> > -        extra_size    = add_extradata * ctx->par_out->extradata_size;
> > -        got_irap     |= is_irap;
> > +        if (nal->type == HEVC_NAL_SEI_PREFIX) {
> > +            ret = update_sei(&s->ps.sei_prefix, nal);
> > +            if (ret < 0)
> > +                goto done;
> > +            continue;
> > +        }
> >
> > -        if (SIZE_MAX - nalu_size < 4 ||
> > -            SIZE_MAX - 4 - nalu_size < extra_size) {
> > -            ret = AVERROR_INVALIDDATA;
> > -            goto fail;
> > +        if (!first_irap && IS_IRAP(nal)) {
> > +            ret = write_extradata(ctx, out, nal);
> > +            if (ret < 0)
> > +                goto done;
> > +            first_irap = 1;
> >          }
> >
> > +        /*write to output packet*/
> >          prev_size = out->size;
> > -
> > -        ret = av_grow_packet(out, 4 + nalu_size + extra_size);
> > -        if (ret < 0)
> > -            goto fail;
> > -
> > -        if (add_extradata)
> > -            memcpy(out->data + prev_size, ctx->par_out->extradata,
> extra_size);
> > -        AV_WB32(out->data + prev_size + extra_size, 1);
> > -        bytestream2_get_buffer(&gb, out->data + prev_size + 4 +
> extra_size, nalu_size);
> > +        av_grow_packet(out, nal->raw_size + 4);
> > +        AV_WB32(out->data + prev_size, 1);
> > +        prev_size += 4;
> > +        memcpy(out->data + prev_size, nal->raw_data, nal->raw_size);
> > +        prev_size += nal->raw_size;
> >      }
> >
> > -    ret = av_packet_copy_props(out, in);
> > -    if (ret < 0)
> > -        goto fail;

> 
> You should not remove this; without it, the packet will be a naked
> (refcounted) buffer without any other information like timing.
> 

thanks, will fix this. 

> > +done:
> > +    ff_h2645_packet_uninit(&pkt);
> >
> > -fail:
> >      if (ret < 0)
> >          av_packet_unref(out);
> >      av_packet_free(&in);
> > @@ -190,6 +505,7 @@ const AVBitStreamFilter ff_hevc_mp4toannexb_bsf = {
> >      .name           = "hevc_mp4toannexb",
> >      .priv_data_size = sizeof(HEVCBSFContext),
> >      .init           = hevc_mp4toannexb_init,
> > +    .close          = hevc_mp4toannexb_close,
> >      .filter         = hevc_mp4toannexb_filter,
> >      .codec_ids      = codec_ids,
> >  };
> 
> General remarks:

> 1. I fail to see why you use refcounted buffers for the parameter
> sets. Their reference count is always 1 anyway. If you add a size
> field to Param, you should be able to remove the AVBufferRef.

ok, will fix.

> 2. The handling of SEI NAL units is not good at all:
> a) There may be multiple SEI NAL units in front of a picture; you keep
> only the last one.

I'll look over the specs on SEI.

> b) You only insert an SEI NAL unit in front of an IRAP picture.

Are you suggesting parsing the SEIs and inserting them if they are
referenced by the nal unit? 

> c) And if the current picture is an IRAP picture, there is no
> guarantee that an SEI that is inserted is actually from the current
> access unit and not from an earlier access unit. Maybe it doesn't
> apply to this access unit at all.


> 3. PPS units are allowed to change from one access unit to the next;
> yet the current code removes any parameter sets from the units and
> only inserts them in front of IRAP units, so that the output stream
> only contains the status of the parameter sets from the moment
> directly before IRAP pictures.

ok, I'll change this. 

> 
> - Andreas

Thanks,
Andriy
Andriy Gelman Sept. 9, 2019, 2:34 a.m. UTC | #3
On Sun, 08. Sep 14:18, Andreas Rheinhardt wrote:
> Andriy Gelman:
> > Changes in v3:
> >     Patch 1/2
> >     - Fixed a bug where rbsp payload (without 0x03) was
> >       written into packet instead of the raw data (with
> >       0x03).  
> >     - Segment packets using ff_h2645_packet_split directly
> >       from mp4-style format without converting to annexb first.
> >     - Corrected fate test hash for hevc-bsf-mp4toannexb .
> > 
> >     Patch 2/2
> >     - Modified fate test so that the output file is
> >       always overwritten.  
> > 
> > 
> > I'm looking for some test data (hevc muxed in mp4) with several temporal layers
> > where the sub_layer_profile_present_flag[s] is set. Let me know if anyone can
> > help.
> > 
> > Thanks,
> > Andriy
> > 
> Hello,
> 
> > diff --git a/libavcodec/hevc_mp4toannexb_bsf.c
> b/libavcodec/hevc_mp4toannexb_bsf.c
> > index 09bce5b34c..11ff262a92 100644
> > --- a/libavcodec/hevc_mp4toannexb_bsf.c
> > +++ b/libavcodec/hevc_mp4toannexb_bsf.c
> > @@ -28,14 +28,185 @@
> >  #include "bsf.h"
> >  #include "bytestream.h"
> >  #include "hevc.h"
> > -
> > -#define MIN_HEVCC_LENGTH 23
> > +#include "h2645_parse.h"
> > +#include "hevc_ps.h"
> > +#include "golomb.h"
> > +
> > +#define MIN_HEVCC_LENGTH            23
> > +#define PROFILE_WITHOUT_IDC_BITS    88
> > +#define IS_IRAP(s)                  ((s)->type >= 16 && (s)->type
> <= 23)
> > +#define IS_PARAMSET(s)              ((s)->type >= 32 && (s)->type
> <= 34)
> > +
> > +#define WRITE_NAL(pkt, prev_size, in_buffer) do {
>          \
> > +    AV_WB32((pkt)->data + (prev_size), 1);
>          \
> > +    prev_size += 4;
>          \
> > +    memcpy((pkt)->data + (prev_size), (in_buffer)->data,
> (in_buffer)->size); \
> > +    prev_size += (in_buffer)->size;
>          \
> > +} while (0)
> > +
> > +typedef struct Param {
> > +    AVBufferRef *raw_data; /*store a copy of the raw data to
> construct extradata*/
> > +    int ref;  /*stores the ref of the higher level parameter set*/
> > +} Param;
> > +
> > +/*modified version of HEVCParamSets to store bytestream and
> reference to previous level*/
> > +typedef struct ParamSets {
> > +    Param vps_list[HEVC_MAX_VPS_COUNT];
> > +    Param sps_list[HEVC_MAX_SPS_COUNT];
> > +    Param pps_list[HEVC_MAX_PPS_COUNT];
> > +
> > +    AVBufferRef *sei_prefix;
> > +} ParamSets;
> >
> >  typedef struct HEVCBSFContext {
> > -    uint8_t  length_size;
> > -    int      extradata_parsed;
> > +    uint8_t      length_size;
> > +    int          extradata_parsed;
> > +    ParamSets    ps; /*make own of version of HEVCParamSets store
> copy of the bytestream*/
> >  } HEVCBSFContext;
> >
> > +
> > +static int parse_vps(AVBSFContext* ctx, H2645NAL *nal)
> 
> Inconsistent placement of *.
> 
> > +{
> > +    int vps_id = 0;
> 
> Unnecessary initialization.
> 
> > +    Param *param_ptr;
> > +
> > +    HEVCBSFContext *s = ctx->priv_data;
> > +    ParamSets *ps     = &s->ps;
> > +
> > +    GetBitContext *gb = &nal->gb;
> > +    int nal_size      = nal->raw_size;
> > +
> > +    vps_id = get_bits(gb, 4);
> > +    if (vps_id >= HEVC_MAX_VPS_COUNT) {
> > +        av_log(ctx, AV_LOG_ERROR, "VPS id out of range: %d\n", vps_id);
> > +        return AVERROR_INVALIDDATA;
> > +    }
> > +
> > +    param_ptr = &ps->vps_list[vps_id];
> > +    if (param_ptr->raw_data && param_ptr->raw_data->size == nal_size &&
> > +        !memcmp(param_ptr->raw_data->data, nal->raw_data, nal_size)) {
> > +        av_log(ctx, AV_LOG_DEBUG, "Parsed VPS id: %d. Copy already
> exists in parameter set\n", vps_id);
> > +    } else {
> > +        AVBufferRef *vps_buf = av_buffer_allocz(nal_size);
> 
> There is no need to zero it as you overwrite the whole buffer a few
> lines below. This goes for SPS and PPS as well.
> Moreover, the very same code for updating the extradata basically
> exists three times. How about factoring it out?
> 
> > +        if (!vps_buf)
> > +            return AVERROR(ENOMEM);
> > +
> > +        /*copy raw data into vps_buf buffer and replace existing
> copy in ps*/
> > +        memcpy(vps_buf->data, nal->raw_data, nal_size);
> > +        av_buffer_unref(&param_ptr->raw_data);
> > +        param_ptr->raw_data = vps_buf;
> > +    }
> > +    return 0;
> > +}
> > +
> > +static int parse_sps(AVBSFContext *ctx, H2645NAL *nal)
> > +{
> > +    int sps_id, vps_ref, max_sub_layers_minus1;
> > +    int i;
> > +    Param *param_ptr;
> > +
> > +    HEVCBSFContext *s = ctx->priv_data;
> > +    ParamSets *ps     = &s->ps;
> > +
> > +    GetBitContext *gb = &nal->gb;
> > +    int nal_size      = nal->raw_size;
> > +
> > +    uint8_t sub_layer_profile_present_flag[HEVC_MAX_SUB_LAYERS];
> > +    uint8_t sub_layer_level_present_flag[HEVC_MAX_SUB_LAYERS];
> > +
> > +    vps_ref = get_bits(gb, 4);
> > +    if (vps_ref >= HEVC_MAX_VPS_COUNT) {
> > +        av_log(ctx, AV_LOG_ERROR, "VPS id out of range: %d\n",
> vps_ref);
> > +        return AVERROR_INVALIDDATA;
> > +    }
> > +
> > +    max_sub_layers_minus1 = get_bits(gb, 3);
> > +    skip_bits1(gb);
> /*sps_temporal_id_nesting_flag*/
> > +    skip_bits(gb, PROFILE_WITHOUT_IDC_BITS); /*profile_tier_level*/
> > +    skip_bits(gb, 8);                        /*general_level_idc*/
> > +
> > +    for (i = 0; i < max_sub_layers_minus1; ++i) {
> > +        sub_layer_profile_present_flag[i] = get_bits1(gb);
> > +        sub_layer_level_present_flag[i]   = get_bits1(gb);
> > +    }
> > +
> > +    if (max_sub_layers_minus1 > 0)
> > +        for (i = max_sub_layers_minus1; i < 8; i++)
> > +            skip_bits(gb, 2); /*reserved_zero_2bits[i]*/
> > +
> > +    for (i = 0; i < max_sub_layers_minus1; ++i) {
> > +        if (sub_layer_profile_present_flag[i])
> > +            skip_bits(gb, PROFILE_WITHOUT_IDC_BITS);
> /*profile_tier_level*/
> > +        if (sub_layer_level_present_flag[i])
> > +            skip_bits(gb, 8);
> /*sub_layer_level_idc*/
> > +    }
> > +
> > +    /*we only need the sps_id index*/
> > +    sps_id = get_ue_golomb_long(gb);
> 
> get_ue_golomb_long returns unsigned. If the return value is so big
> that it is no longer representable in an int (like sps_id), it will be
> negative and the check below will not catch this.
> Furthermore, get_ue_golomb_long is not a really safe function to begin
> with: E.g. if there are 32 zero bits in a row, get_ue_golomb_long will
> treat it as if there were 31 zero bits followed by the actual value +1
> coded on the next 32 bits. And there is no check for end-of-bitstream.
> 
> These remarks apply to parse_pps as well.
> 
> > +    if (sps_id >= HEVC_MAX_SPS_COUNT) {
> > +        av_log(ctx, AV_LOG_ERROR, "SPS id out of range: %d\n", sps_id);
> > +        return AVERROR_INVALIDDATA;
> > +    }
> > +
> > +    param_ptr = &ps->sps_list[sps_id];
> > +    if (param_ptr->raw_data && param_ptr->raw_data->size == nal_size &&
> > +        !memcmp(param_ptr->raw_data->data, nal->raw_data, nal_size)) {
> > +        av_log(ctx, AV_LOG_DEBUG, "Parsed SPS id: %d. Copy already
> exists in parameter set\n", sps_id);
> > +    } else {
> > +        AVBufferRef *sps_buf = av_buffer_allocz(nal_size);
> > +        if (!sps_buf)
> > +            return AVERROR(ENOMEM);
> > +
> > +        /*copy raw data into vps_buf buffer and replace existing
> copy in ps*/
> > +        memcpy(sps_buf->data, nal->raw_data, nal_size);
> > +        av_buffer_unref(&param_ptr->raw_data);
> > +        param_ptr->raw_data = sps_buf;
> > +        param_ptr->ref      = vps_ref;
> > +    }
> > +    return 0;
> > +}
> > +
> > +static int parse_pps(AVBSFContext *ctx, H2645NAL *nal)
> > +{
> > +    int pps_id, sps_ref;
> > +    Param *param_ptr;
> > +
> > +    HEVCBSFContext *s = ctx->priv_data;
> > +    ParamSets *ps     = &s->ps;
> > +
> > +    GetBitContext *gb = &nal->gb;
> > +    int nal_size      = nal->raw_size;
> > +
> > +    pps_id = get_ue_golomb_long(gb);
> > +    if (pps_id >= HEVC_MAX_PPS_COUNT) {
> > +        av_log(ctx, AV_LOG_ERROR, "PPS id out of range: %d\n", pps_id);
> > +        return AVERROR_INVALIDDATA;
> > +    }
> > +
> > +    sps_ref = get_ue_golomb_long(gb);
> > +    if (sps_ref >= HEVC_MAX_SPS_COUNT) {
> > +        av_log(ctx, AV_LOG_ERROR, "SPS id out of range: %d\n",
> sps_ref);
> > +        return AVERROR_INVALIDDATA;
> > +    }
> > +
> > +    param_ptr = &ps->pps_list[pps_id];
> > +    if (param_ptr->raw_data && param_ptr->raw_data->size == nal_size &&
> > +        !memcmp(param_ptr->raw_data->data, nal->raw_data, nal_size)) {
> > +        av_log(ctx, AV_LOG_DEBUG, "Parsed PPS id: %d. Copy already
> exists in parameter set\n", pps_id);
> > +    } else {
> > +        AVBufferRef *pps_buf = av_buffer_allocz(nal_size);
> > +        if (!pps_buf)
> > +            return AVERROR(ENOMEM);
> > +
> > +        /*copy raw data into vps_buf buffer and replace existing
> copy in ps*/
> > +        memcpy(pps_buf->data, nal->raw_data, nal_size);
> > +        av_buffer_unref(&param_ptr->raw_data);
> > +        param_ptr->raw_data = pps_buf;
> > +        param_ptr->ref      = sps_ref;
> > +    }
> > +    return 0;
> > +}
> > +
> >  static int hevc_extradata_to_annexb(AVBSFContext *ctx)
> >  {
> >      GetByteContext gb;
> > @@ -94,10 +265,45 @@ fail:
> >      return ret;
> >  }
> >
> > +static int update_sei(AVBufferRef **sei, H2645NAL *nal)
> > +{
> > +    AVBufferRef *tmp;
> > +
> > +    av_buffer_unref(sei);
> > +    tmp = av_buffer_alloc(nal->raw_size);
> > +    if (!tmp)
> > +        return AVERROR(ENOMEM);
> > +
> > +    memcpy(tmp->data, nal->raw_data, nal->raw_size);
> > +    *sei = tmp;
> > +
> > +    return 0;
> > +}
> > +
> > +static int update_paramset(AVBSFContext *ctx, H2645NAL *nal)
> > +{
> > +    int ret;
> > +    switch (nal->type) {
> > +        case (HEVC_NAL_VPS):
> > +            if (ret = parse_vps(ctx, nal) < 0)
> 
> < has higher precedence than =, so that the above will be read as "if
> (ret = (parse_vps(ctx, nal) < 0))", i.e. ret will be 0 or 1, so use
> parentheses.
> 
> > +                return ret;
> > +            break;
> > +        case (HEVC_NAL_SPS):
> > +            if (ret = parse_sps(ctx, nal) < 0)
> > +                return ret;
> > +            break;
> > +        case (HEVC_NAL_PPS):
> > +            if (ret = parse_pps(ctx, nal) < 0)
> > +                return ret;
> > +    }
> > +    return 0;
> > +}
> > +
> >  static int hevc_mp4toannexb_init(AVBSFContext *ctx)
> >  {
> >      HEVCBSFContext *s = ctx->priv_data;
> > -    int ret;
> > +    H2645Packet pkt;
> > +    int i, ret = 0;
> >
> >      if (ctx->par_in->extradata_size < MIN_HEVCC_LENGTH ||
> >          AV_RB24(ctx->par_in->extradata) == 1           ||
> > @@ -110,7 +316,114 @@ static int hevc_mp4toannexb_init(AVBSFContext
> *ctx)
> >              return ret;
> >          s->length_size      = ret;
> >          s->extradata_parsed = 1;
> > +
> > +        memset(&pkt, 0, sizeof(H2645Packet));
> > +        ret = ff_h2645_packet_split(&pkt, ctx->par_out->extradata,
> ctx->par_out->extradata_size,
> > +                                     ctx, 0, 0, AV_CODEC_ID_HEVC,
> 1, 0);
> > +        if (ret < 0)
> > +            goto done;
> > +
> > +        for (i = 0; i < pkt.nb_nals; ++i) {
> > +            H2645NAL *nal = &pkt.nals[i];
> > +
> > +            /*current segmentation algorithm includes next 0x00
> from next nal unit*/
> 
> You could use ff_h2645_packet_split to split the extradata as well.
> See cbs_h2645_split_fragment in cbs_h2645.c for an example of this.
> 
> > +            if (nal->raw_data[nal->raw_size - 1] == 0x00)
> > +                nal->raw_size--;
> > +
> > +            if (IS_PARAMSET(nal)) {
> > +                ret = update_paramset(ctx, nal);
> > +                if (ret < 0)
> > +                    goto done;
> > +                continue;
> > +            }
> > +
> > +            if (nal->type == HEVC_NAL_SEI_PREFIX) {
> > +                ret = update_sei(&s->ps.sei_prefix, nal);
> > +                if (ret < 0)
> > +                    goto done;
> > +            }
> > +        }
> > +    }
> > +
> > +done:
> > +    ff_h2645_packet_uninit(&pkt);
> 
> You are calling this even if the input looks like annex b in which
> case pkt is uninitialized.
> 
> > +    return ret;
> > +}
> > +
> > +static void ps_uninit(ParamSets* ps)
> 
> Uncommon placement of *.
> 
> > +{
> > +    int i;
> > +    for (i = 0; i < FF_ARRAY_ELEMS(ps->vps_list); i++)
> > +       av_buffer_unref(&ps->vps_list[i].raw_data);
> > +    for (i = 0; i < FF_ARRAY_ELEMS(ps->sps_list); i++)
> > +       av_buffer_unref(&ps->sps_list[i].raw_data);
> > +    for (i = 0; i < FF_ARRAY_ELEMS(ps->pps_list); i++)
> > +       av_buffer_unref(&ps->pps_list[i].raw_data);
> > +
> > +    av_buffer_unref(&ps->sei_prefix);
> > +}
> > +
> > +static void hevc_mp4toannexb_close(AVBSFContext *ctx)
> > +{
> > +    HEVCBSFContext *s = (HEVCBSFContext*)ctx->priv_data;
> > +    ps_uninit(&s->ps);
> > +}
> > +
> > +static int write_extradata(AVBSFContext *ctx, AVPacket *pkt_out,
> H2645NAL *nal_irap)
> > +{
> > +    int ref, ret, prev_size;
> > +    int new_extradata_size = 0;
> > +
> > +    HEVCBSFContext *s = (HEVCBSFContext*)ctx->priv_data;
> 
> ctx->priv_data is a pointer to void, so that the cast is unnecessary.
> 
> > +    ParamSets *ps     = &s->ps;
> > +    GetBitContext *gb = &nal_irap->gb;
> > +
> > +    /* currently active pps parameter set */
> > +    const Param *vps;
> > +    const Param *sps;
> > +    const Param *pps;
> > +
> > +    skip_bits1(gb); /*first_slice_ic_pic_flag*/
> > +    skip_bits1(gb); /*no_output_of_prior_pics_flag*/
> > +
> > +    ref = get_ue_golomb_long(gb);
> > +    if (ref >= HEVC_MAX_PPS_COUNT || !ps->pps_list[ref].raw_data) {
> > +        av_log(ctx, AV_LOG_ERROR, "Invalid PPS: %d\n", ref);
> > +        return AVERROR_INVALIDDATA;
> > +    }
> > +    pps                 = &ps->pps_list[ref];
> > +    new_extradata_size += pps->raw_data->size + 4;
> > +    ref                 = pps->ref;
> > +
> > +    if (ref >= HEVC_MAX_SPS_COUNT || !ps->sps_list[ref].raw_data) {
> > +        av_log(ctx, AV_LOG_ERROR, "Invalid SPS: %d\n", ref);
> > +        return AVERROR_INVALIDDATA;
> >      }
> > +    sps                 = &ps->sps_list[ref];
> > +    new_extradata_size += sps->raw_data->size + 4;
> > +    ref                 = sps->ref;
> > +
> > +    if (ref >= HEVC_MAX_VPS_COUNT || !ps->vps_list[ref].raw_data) {
> > +        av_log(ctx, AV_LOG_ERROR, "Invalid VPS: %d\n", ref);
> > +        return AVERROR_INVALIDDATA;
> > +    }
> > +    vps                 = &ps->vps_list[ref];
> > +    new_extradata_size += vps->raw_data->size + 4;
> > +
> > +    if (ps->sei_prefix)
> > +        new_extradata_size += ps->sei_prefix->size + 4;
> > +
> > +    prev_size = pkt_out->size;
> > +    ret = av_grow_packet(pkt_out, new_extradata_size);
> > +    if (ret < 0)
> > +        return AVERROR(ENOMEM);
> 
> Just forward the error.
> 
> > +
> > +    WRITE_NAL(pkt_out, prev_size, vps->raw_data);
> > +    WRITE_NAL(pkt_out, prev_size, sps->raw_data);
> > +    WRITE_NAL(pkt_out, prev_size, pps->raw_data);
> > +
> > +    if (ps->sei_prefix)
> > +        WRITE_NAL(pkt_out, prev_size, ps->sei_prefix);
> >
> >      return 0;
> >  }
> > @@ -119,62 +432,64 @@ static int
> hevc_mp4toannexb_filter(AVBSFContext *ctx, AVPacket *out)
> >  {
> >      HEVCBSFContext *s = ctx->priv_data;
> >      AVPacket *in;
> > -    GetByteContext gb;
> > -
> > -    int got_irap = 0;
> > -    int i, ret = 0;
> > +    H2645Packet pkt;
> > +    int i, prev_size;
> > +    int ret = 0;
> > +    int first_irap = 0;
> >
> >      ret = ff_bsf_get_packet(ctx, &in);
> >      if (ret < 0)
> >          return ret;
> >
> > +    /*output the annexb nalu if extradata is not parsed*/
> >      if (!s->extradata_parsed) {
> >          av_packet_move_ref(out, in);
> >          av_packet_free(&in);
> >          return 0;
> >      }
> >
> > -    bytestream2_init(&gb, in->data, in->size);
> > +    memset(&pkt, 0, sizeof(H2645Packet));
> > +    ret = ff_h2645_packet_split(&pkt, in->data, in->size, ctx, 1,
> s->length_size, AV_CODEC_ID_HEVC, 1, 0);
> > +    if (ret < 0)
> > +        goto done;
> >
> > -    while (bytestream2_get_bytes_left(&gb)) {
> > -        uint32_t nalu_size = 0;
> > -        int      nalu_type;
> > -        int is_irap, add_extradata, extra_size, prev_size;
> > +    for (i = 0; i < pkt.nb_nals; ++i) {
> >
> > -        for (i = 0; i < s->length_size; i++)
> > -            nalu_size = (nalu_size << 8) | bytestream2_get_byte(&gb);
> > +        H2645NAL *nal = &pkt.nals[i];
> >
> > -        nalu_type = (bytestream2_peek_byte(&gb) >> 1) & 0x3f;
> > +        if (IS_PARAMSET(nal)) {
> > +            ret = update_paramset(ctx, nal);
> > +            if (ret < 0)
> > +                goto done;
> > +            continue;
> > +        }
> >
> > -        /* prepend extradata to IRAP frames */
> > -        is_irap       = nalu_type >= 16 && nalu_type <= 23;
> > -        add_extradata = is_irap && !got_irap;
> > -        extra_size    = add_extradata * ctx->par_out->extradata_size;
> > -        got_irap     |= is_irap;
> > +        if (nal->type == HEVC_NAL_SEI_PREFIX) {
> > +            ret = update_sei(&s->ps.sei_prefix, nal);
> > +            if (ret < 0)
> > +                goto done;
> > +            continue;
> > +        }
> >
> > -        if (SIZE_MAX - nalu_size < 4 ||
> > -            SIZE_MAX - 4 - nalu_size < extra_size) {
> > -            ret = AVERROR_INVALIDDATA;
> > -            goto fail;
> > +        if (!first_irap && IS_IRAP(nal)) {
> > +            ret = write_extradata(ctx, out, nal);
> > +            if (ret < 0)
> > +                goto done;
> > +            first_irap = 1;
> >          }
> >
> > +        /*write to output packet*/
> >          prev_size = out->size;
> > -
> > -        ret = av_grow_packet(out, 4 + nalu_size + extra_size);
> > -        if (ret < 0)
> > -            goto fail;
> > -
> > -        if (add_extradata)
> > -            memcpy(out->data + prev_size, ctx->par_out->extradata,
> extra_size);
> > -        AV_WB32(out->data + prev_size + extra_size, 1);
> > -        bytestream2_get_buffer(&gb, out->data + prev_size + 4 +
> extra_size, nalu_size);
> > +        av_grow_packet(out, nal->raw_size + 4);
> > +        AV_WB32(out->data + prev_size, 1);
> > +        prev_size += 4;
> > +        memcpy(out->data + prev_size, nal->raw_data, nal->raw_size);
> > +        prev_size += nal->raw_size;
> >      }
> >
> > -    ret = av_packet_copy_props(out, in);
> > -    if (ret < 0)
> > -        goto fail;
> 
> You should not remove this; without it, the packet will be a naked
> (refcounted) buffer without any other information like timing.
> 
> > +done:
> > +    ff_h2645_packet_uninit(&pkt);
> >
> > -fail:
> >      if (ret < 0)
> >          av_packet_unref(out);
> >      av_packet_free(&in);
> > @@ -190,6 +505,7 @@ const AVBitStreamFilter ff_hevc_mp4toannexb_bsf = {
> >      .name           = "hevc_mp4toannexb",
> >      .priv_data_size = sizeof(HEVCBSFContext),
> >      .init           = hevc_mp4toannexb_init,
> > +    .close          = hevc_mp4toannexb_close,
> >      .filter         = hevc_mp4toannexb_filter,
> >      .codec_ids      = codec_ids,
> >  };
> 
> General remarks:
> 1. I fail to see why you use refcounted buffers for the parameter
> sets. Their reference count is always 1 anyway. If you add a size
> field to Param, you should be able to remove the AVBufferRef.

> 2. The handling of SEI NAL units is not good at all:
> a) There may be multiple SEI NAL units in front of a picture; you keep
> only the last one.
> b) You only insert an SEI NAL unit in front of an IRAP picture.
> c) And if the current picture is an IRAP picture, there is no
> guarantee that an SEI that is inserted is actually from the current
> access unit and not from an earlier access unit. Maybe it doesn't
> apply to this access unit at all.

How about I use the same approach as the original code?
Make a copy of the SEI from extradata and always insert that version at the
start of irap. Any other SEIs can just be written to the output as before.

I'd like to look into properly parsing the SEI in the future, but I feel it
should be a separate commit.

> 3. PPS units are allowed to change from one access unit to the next;
> yet the current code removes any parameter sets from the units and
> only inserts them in front of IRAP units, so that the output stream
> only contains the status of the parameter sets from the moment
> directly before IRAP pictures.

I changed the code to write all parameter sets to the output if the access unit
doesn't have an irap nal.

Andriy
diff mbox

Patch

diff --git a/libavcodec/hevc_mp4toannexb_bsf.c b/libavcodec/hevc_mp4toannexb_bsf.c
index 09bce5b34c..11ff262a92 100644
--- a/libavcodec/hevc_mp4toannexb_bsf.c
+++ b/libavcodec/hevc_mp4toannexb_bsf.c
@@ -28,14 +28,185 @@ 
 #include "bsf.h"
 #include "bytestream.h"
 #include "hevc.h"
-
-#define MIN_HEVCC_LENGTH 23
+#include "h2645_parse.h"
+#include "hevc_ps.h"
+#include "golomb.h"
+
+#define MIN_HEVCC_LENGTH            23
+#define PROFILE_WITHOUT_IDC_BITS    88
+#define IS_IRAP(s)                  ((s)->type >= 16 && (s)->type <= 23)
+#define IS_PARAMSET(s)              ((s)->type >= 32 && (s)->type <= 34)
+
+#define WRITE_NAL(pkt, prev_size, in_buffer) do {                            \
+    AV_WB32((pkt)->data + (prev_size), 1);                                   \
+    prev_size += 4;                                                          \
+    memcpy((pkt)->data + (prev_size), (in_buffer)->data, (in_buffer)->size); \
+    prev_size += (in_buffer)->size;                                          \
+} while (0)
+
+typedef struct Param {
+    AVBufferRef *raw_data; /*store a copy of the raw data to construct extradata*/
+    int ref;  /*stores the ref of the higher level parameter set*/
+} Param;
+
+/*modified version of HEVCParamSets to store bytestream and reference to previous level*/
+typedef struct ParamSets {
+    Param vps_list[HEVC_MAX_VPS_COUNT];
+    Param sps_list[HEVC_MAX_SPS_COUNT];
+    Param pps_list[HEVC_MAX_PPS_COUNT];
+
+    AVBufferRef *sei_prefix;
+} ParamSets;
 
 typedef struct HEVCBSFContext {
-    uint8_t  length_size;
-    int      extradata_parsed;
+    uint8_t      length_size;
+    int          extradata_parsed;
+    ParamSets    ps; /*make own of version of HEVCParamSets store copy of the bytestream*/
 } HEVCBSFContext;
 
+
+static int parse_vps(AVBSFContext* ctx, H2645NAL *nal)
+{
+    int vps_id = 0;
+    Param *param_ptr;
+
+    HEVCBSFContext *s = ctx->priv_data;
+    ParamSets *ps     = &s->ps;
+
+    GetBitContext *gb = &nal->gb;
+    int nal_size      = nal->raw_size;
+
+    vps_id = get_bits(gb, 4);
+    if (vps_id >= HEVC_MAX_VPS_COUNT) {
+        av_log(ctx, AV_LOG_ERROR, "VPS id out of range: %d\n", vps_id);
+        return AVERROR_INVALIDDATA;
+    }
+
+    param_ptr = &ps->vps_list[vps_id];
+    if (param_ptr->raw_data && param_ptr->raw_data->size == nal_size &&
+        !memcmp(param_ptr->raw_data->data, nal->raw_data, nal_size)) {
+        av_log(ctx, AV_LOG_DEBUG, "Parsed VPS id: %d. Copy already exists in parameter set\n", vps_id);
+    } else {
+        AVBufferRef *vps_buf = av_buffer_allocz(nal_size);
+        if (!vps_buf)
+            return AVERROR(ENOMEM);
+
+        /*copy raw data into vps_buf buffer and replace existing copy in ps*/
+        memcpy(vps_buf->data, nal->raw_data, nal_size);
+        av_buffer_unref(&param_ptr->raw_data);
+        param_ptr->raw_data = vps_buf;
+    }
+    return 0;
+}
+
+static int parse_sps(AVBSFContext *ctx, H2645NAL *nal)
+{
+    int sps_id, vps_ref, max_sub_layers_minus1;
+    int i;
+    Param *param_ptr;
+
+    HEVCBSFContext *s = ctx->priv_data;
+    ParamSets *ps     = &s->ps;
+
+    GetBitContext *gb = &nal->gb;
+    int nal_size      = nal->raw_size;
+
+    uint8_t sub_layer_profile_present_flag[HEVC_MAX_SUB_LAYERS];
+    uint8_t sub_layer_level_present_flag[HEVC_MAX_SUB_LAYERS];
+
+    vps_ref = get_bits(gb, 4);
+    if (vps_ref >= HEVC_MAX_VPS_COUNT) {
+        av_log(ctx, AV_LOG_ERROR, "VPS id out of range: %d\n", vps_ref);
+        return AVERROR_INVALIDDATA;
+    }
+
+    max_sub_layers_minus1 = get_bits(gb, 3);
+    skip_bits1(gb);                          /*sps_temporal_id_nesting_flag*/
+    skip_bits(gb, PROFILE_WITHOUT_IDC_BITS); /*profile_tier_level*/
+    skip_bits(gb, 8);                        /*general_level_idc*/
+
+    for (i = 0; i < max_sub_layers_minus1; ++i) {
+        sub_layer_profile_present_flag[i] = get_bits1(gb);
+        sub_layer_level_present_flag[i]   = get_bits1(gb);
+    }
+
+    if (max_sub_layers_minus1 > 0)
+        for (i = max_sub_layers_minus1; i < 8; i++)
+            skip_bits(gb, 2); /*reserved_zero_2bits[i]*/
+
+    for (i = 0; i < max_sub_layers_minus1; ++i) {
+        if (sub_layer_profile_present_flag[i])
+            skip_bits(gb, PROFILE_WITHOUT_IDC_BITS); /*profile_tier_level*/
+        if (sub_layer_level_present_flag[i])
+            skip_bits(gb, 8);                        /*sub_layer_level_idc*/
+    }
+
+    /*we only need the sps_id index*/
+    sps_id = get_ue_golomb_long(gb);
+    if (sps_id >= HEVC_MAX_SPS_COUNT) {
+        av_log(ctx, AV_LOG_ERROR, "SPS id out of range: %d\n", sps_id);
+        return AVERROR_INVALIDDATA;
+    }
+
+    param_ptr = &ps->sps_list[sps_id];
+    if (param_ptr->raw_data && param_ptr->raw_data->size == nal_size &&
+        !memcmp(param_ptr->raw_data->data, nal->raw_data, nal_size)) {
+        av_log(ctx, AV_LOG_DEBUG, "Parsed SPS id: %d. Copy already exists in parameter set\n", sps_id);
+    } else {
+        AVBufferRef *sps_buf = av_buffer_allocz(nal_size);
+        if (!sps_buf)
+            return AVERROR(ENOMEM);
+
+        /*copy raw data into vps_buf buffer and replace existing copy in ps*/
+        memcpy(sps_buf->data, nal->raw_data, nal_size);
+        av_buffer_unref(&param_ptr->raw_data);
+        param_ptr->raw_data = sps_buf;
+        param_ptr->ref      = vps_ref;
+    }
+    return 0;
+}
+
+static int parse_pps(AVBSFContext *ctx, H2645NAL *nal)
+{
+    int pps_id, sps_ref;
+    Param *param_ptr;
+
+    HEVCBSFContext *s = ctx->priv_data;
+    ParamSets *ps     = &s->ps;
+
+    GetBitContext *gb = &nal->gb;
+    int nal_size      = nal->raw_size;
+
+    pps_id = get_ue_golomb_long(gb);
+    if (pps_id >= HEVC_MAX_PPS_COUNT) {
+        av_log(ctx, AV_LOG_ERROR, "PPS id out of range: %d\n", pps_id);
+        return AVERROR_INVALIDDATA;
+    }
+
+    sps_ref = get_ue_golomb_long(gb);
+    if (sps_ref >= HEVC_MAX_SPS_COUNT) {
+        av_log(ctx, AV_LOG_ERROR, "SPS id out of range: %d\n", sps_ref);
+        return AVERROR_INVALIDDATA;
+    }
+
+    param_ptr = &ps->pps_list[pps_id];
+    if (param_ptr->raw_data && param_ptr->raw_data->size == nal_size &&
+        !memcmp(param_ptr->raw_data->data, nal->raw_data, nal_size)) {
+        av_log(ctx, AV_LOG_DEBUG, "Parsed PPS id: %d. Copy already exists in parameter set\n", pps_id);
+    } else {
+        AVBufferRef *pps_buf = av_buffer_allocz(nal_size);
+        if (!pps_buf)
+            return AVERROR(ENOMEM);
+
+        /*copy raw data into vps_buf buffer and replace existing copy in ps*/
+        memcpy(pps_buf->data, nal->raw_data, nal_size);
+        av_buffer_unref(&param_ptr->raw_data);
+        param_ptr->raw_data = pps_buf;
+        param_ptr->ref      = sps_ref;
+    }
+    return 0;
+}
+
 static int hevc_extradata_to_annexb(AVBSFContext *ctx)
 {
     GetByteContext gb;
@@ -94,10 +265,45 @@  fail:
     return ret;
 }
 
+static int update_sei(AVBufferRef **sei, H2645NAL *nal)
+{
+    AVBufferRef *tmp;
+
+    av_buffer_unref(sei);
+    tmp = av_buffer_alloc(nal->raw_size);
+    if (!tmp)
+        return AVERROR(ENOMEM);
+
+    memcpy(tmp->data, nal->raw_data, nal->raw_size);
+    *sei = tmp;
+
+    return 0;
+}
+
+static int update_paramset(AVBSFContext *ctx, H2645NAL *nal)
+{
+    int ret;
+    switch (nal->type) {
+        case (HEVC_NAL_VPS):
+            if (ret = parse_vps(ctx, nal) < 0)
+                return ret;
+            break;
+        case (HEVC_NAL_SPS):
+            if (ret = parse_sps(ctx, nal) < 0)
+                return ret;
+            break;
+        case (HEVC_NAL_PPS):
+            if (ret = parse_pps(ctx, nal) < 0)
+                return ret;
+    }
+    return 0;
+}
+
 static int hevc_mp4toannexb_init(AVBSFContext *ctx)
 {
     HEVCBSFContext *s = ctx->priv_data;
-    int ret;
+    H2645Packet pkt;
+    int i, ret = 0;
 
     if (ctx->par_in->extradata_size < MIN_HEVCC_LENGTH ||
         AV_RB24(ctx->par_in->extradata) == 1           ||
@@ -110,7 +316,114 @@  static int hevc_mp4toannexb_init(AVBSFContext *ctx)
             return ret;
         s->length_size      = ret;
         s->extradata_parsed = 1;
+
+        memset(&pkt, 0, sizeof(H2645Packet));
+        ret = ff_h2645_packet_split(&pkt, ctx->par_out->extradata, ctx->par_out->extradata_size,
+                                     ctx, 0, 0, AV_CODEC_ID_HEVC, 1, 0);
+        if (ret < 0)
+            goto done;
+
+        for (i = 0; i < pkt.nb_nals; ++i) {
+            H2645NAL *nal = &pkt.nals[i];
+
+            /*current segmentation algorithm includes next 0x00 from next nal unit*/
+            if (nal->raw_data[nal->raw_size - 1] == 0x00)
+                nal->raw_size--;
+
+            if (IS_PARAMSET(nal)) {
+                ret = update_paramset(ctx, nal);
+                if (ret < 0)
+                    goto done;
+                continue;
+            }
+
+            if (nal->type == HEVC_NAL_SEI_PREFIX) {
+                ret = update_sei(&s->ps.sei_prefix, nal);
+                if (ret < 0)
+                    goto done;
+            }
+        }
+    }
+
+done:
+    ff_h2645_packet_uninit(&pkt);
+    return ret;
+}
+
+static void ps_uninit(ParamSets* ps)
+{
+    int i;
+    for (i = 0; i < FF_ARRAY_ELEMS(ps->vps_list); i++)
+       av_buffer_unref(&ps->vps_list[i].raw_data);
+    for (i = 0; i < FF_ARRAY_ELEMS(ps->sps_list); i++)
+       av_buffer_unref(&ps->sps_list[i].raw_data);
+    for (i = 0; i < FF_ARRAY_ELEMS(ps->pps_list); i++)
+       av_buffer_unref(&ps->pps_list[i].raw_data);
+
+    av_buffer_unref(&ps->sei_prefix);
+}
+
+static void hevc_mp4toannexb_close(AVBSFContext *ctx)
+{
+    HEVCBSFContext *s = (HEVCBSFContext*)ctx->priv_data;
+    ps_uninit(&s->ps);
+}
+
+static int write_extradata(AVBSFContext *ctx, AVPacket *pkt_out, H2645NAL *nal_irap)
+{
+    int ref, ret, prev_size;
+    int new_extradata_size = 0;
+
+    HEVCBSFContext *s = (HEVCBSFContext*)ctx->priv_data;
+    ParamSets *ps     = &s->ps;
+    GetBitContext *gb = &nal_irap->gb;
+
+    /* currently active pps parameter set */
+    const Param *vps;
+    const Param *sps;
+    const Param *pps;
+
+    skip_bits1(gb); /*first_slice_ic_pic_flag*/
+    skip_bits1(gb); /*no_output_of_prior_pics_flag*/
+
+    ref = get_ue_golomb_long(gb);
+    if (ref >= HEVC_MAX_PPS_COUNT || !ps->pps_list[ref].raw_data) {
+        av_log(ctx, AV_LOG_ERROR, "Invalid PPS: %d\n", ref);
+        return AVERROR_INVALIDDATA;
+    }
+    pps                 = &ps->pps_list[ref];
+    new_extradata_size += pps->raw_data->size + 4;
+    ref                 = pps->ref;
+
+    if (ref >= HEVC_MAX_SPS_COUNT || !ps->sps_list[ref].raw_data) {
+        av_log(ctx, AV_LOG_ERROR, "Invalid SPS: %d\n", ref);
+        return AVERROR_INVALIDDATA;
     }
+    sps                 = &ps->sps_list[ref];
+    new_extradata_size += sps->raw_data->size + 4;
+    ref                 = sps->ref;
+
+    if (ref >= HEVC_MAX_VPS_COUNT || !ps->vps_list[ref].raw_data) {
+        av_log(ctx, AV_LOG_ERROR, "Invalid VPS: %d\n", ref);
+        return AVERROR_INVALIDDATA;
+    }
+    vps                 = &ps->vps_list[ref];
+    new_extradata_size += vps->raw_data->size + 4;
+
+    if (ps->sei_prefix)
+        new_extradata_size += ps->sei_prefix->size + 4;
+
+    prev_size = pkt_out->size;
+    ret = av_grow_packet(pkt_out, new_extradata_size);
+    if (ret < 0)
+        return AVERROR(ENOMEM);
+
+    WRITE_NAL(pkt_out, prev_size, vps->raw_data);
+    WRITE_NAL(pkt_out, prev_size, sps->raw_data);
+    WRITE_NAL(pkt_out, prev_size, pps->raw_data);
+
+    if (ps->sei_prefix)
+        WRITE_NAL(pkt_out, prev_size, ps->sei_prefix);
 
     return 0;
 }
@@ -119,62 +432,64 @@  static int hevc_mp4toannexb_filter(AVBSFContext *ctx, AVPacket *out)
 {
     HEVCBSFContext *s = ctx->priv_data;
     AVPacket *in;
-    GetByteContext gb;
-
-    int got_irap = 0;
-    int i, ret = 0;
+    H2645Packet pkt;
+    int i, prev_size;
+    int ret = 0;
+    int first_irap = 0;
 
     ret = ff_bsf_get_packet(ctx, &in);
     if (ret < 0)
         return ret;
 
+    /*output the annexb nalu if extradata is not parsed*/
     if (!s->extradata_parsed) {
         av_packet_move_ref(out, in);
         av_packet_free(&in);
         return 0;
     }
 
-    bytestream2_init(&gb, in->data, in->size);
+    memset(&pkt, 0, sizeof(H2645Packet));
+    ret = ff_h2645_packet_split(&pkt, in->data, in->size, ctx, 1, s->length_size, AV_CODEC_ID_HEVC, 1, 0);
+    if (ret < 0)
+        goto done;
 
-    while (bytestream2_get_bytes_left(&gb)) {
-        uint32_t nalu_size = 0;
-        int      nalu_type;
-        int is_irap, add_extradata, extra_size, prev_size;
+    for (i = 0; i < pkt.nb_nals; ++i) {
 
-        for (i = 0; i < s->length_size; i++)
-            nalu_size = (nalu_size << 8) | bytestream2_get_byte(&gb);
+        H2645NAL *nal = &pkt.nals[i];
 
-        nalu_type = (bytestream2_peek_byte(&gb) >> 1) & 0x3f;
+        if (IS_PARAMSET(nal)) {
+            ret = update_paramset(ctx, nal);
+            if (ret < 0)
+                goto done;
+            continue;
+        }
 
-        /* prepend extradata to IRAP frames */
-        is_irap       = nalu_type >= 16 && nalu_type <= 23;
-        add_extradata = is_irap && !got_irap;
-        extra_size    = add_extradata * ctx->par_out->extradata_size;
-        got_irap     |= is_irap;
+        if (nal->type == HEVC_NAL_SEI_PREFIX) {
+            ret = update_sei(&s->ps.sei_prefix, nal);
+            if (ret < 0)
+                goto done;
+            continue;
+        }
 
-        if (SIZE_MAX - nalu_size < 4 ||
-            SIZE_MAX - 4 - nalu_size < extra_size) {
-            ret = AVERROR_INVALIDDATA;
-            goto fail;
+        if (!first_irap && IS_IRAP(nal)) {
+            ret = write_extradata(ctx, out, nal);
+            if (ret < 0)
+                goto done;
+            first_irap = 1;
         }
 
+        /*write to output packet*/
         prev_size = out->size;
-
-        ret = av_grow_packet(out, 4 + nalu_size + extra_size);
-        if (ret < 0)
-            goto fail;
-
-        if (add_extradata)
-            memcpy(out->data + prev_size, ctx->par_out->extradata, extra_size);
-        AV_WB32(out->data + prev_size + extra_size, 1);
-        bytestream2_get_buffer(&gb, out->data + prev_size + 4 + extra_size, nalu_size);
+        av_grow_packet(out, nal->raw_size + 4);
+        AV_WB32(out->data + prev_size, 1);
+        prev_size += 4;
+        memcpy(out->data + prev_size, nal->raw_data, nal->raw_size);
+        prev_size += nal->raw_size;
     }
 
-    ret = av_packet_copy_props(out, in);
-    if (ret < 0)
-        goto fail;
+done:
+    ff_h2645_packet_uninit(&pkt);
 
-fail:
     if (ret < 0)
         av_packet_unref(out);
     av_packet_free(&in);
@@ -190,6 +505,7 @@  const AVBitStreamFilter ff_hevc_mp4toannexb_bsf = {
     .name           = "hevc_mp4toannexb",
     .priv_data_size = sizeof(HEVCBSFContext),
     .init           = hevc_mp4toannexb_init,
+    .close          = hevc_mp4toannexb_close,
     .filter         = hevc_mp4toannexb_filter,
     .codec_ids      = codec_ids,
 };
diff --git a/tests/fate/hevc.mak b/tests/fate/hevc.mak
index 559c3898bc..4f812b0834 100644
--- a/tests/fate/hevc.mak
+++ b/tests/fate/hevc.mak
@@ -238,7 +238,7 @@  FATE_HEVC-$(call ALLYES, HEVC_DEMUXER MOV_DEMUXER HEVC_MP4TOANNEXB_BSF MOV_MUXER
 fate-hevc-bsf-mp4toannexb: tests/data/hevc-mp4.mov
 fate-hevc-bsf-mp4toannexb: CMD = md5 -i $(TARGET_PATH)/tests/data/hevc-mp4.mov -c:v copy -fflags +bitexact -f hevc
 fate-hevc-bsf-mp4toannexb: CMP = oneline
-fate-hevc-bsf-mp4toannexb: REF = 1873662a3af1848c37e4eb25722c8df9
+fate-hevc-bsf-mp4toannexb: REF = 3c9d998a3aa2b9e0fb1c1f434952bf8b
 
 fate-hevc-skiploopfilter: CMD = framemd5 -skip_loop_filter nokey -i $(TARGET_SAMPLES)/hevc-conformance/SAO_D_Samsung_5.bit -sws_flags bitexact
 FATE_HEVC += fate-hevc-skiploopfilter