diff mbox series

[FFmpeg-devel,2/2,v2] avformat/movenc: parse h264 packets to build Sync Sample and Recovery Point tables

Message ID 20210729210651.25168-2-jamrial@gmail.com
State New
Headers show
Series [FFmpeg-devel,1/2] avformat/movenc: don't negate roll_distance when writing it
Related show

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate fail Make fate failed
andriy/PPC64_make success Make finished
andriy/PPC64_make_fate warning Make fate failed

Commit Message

James Almer July 29, 2021, 9:06 p.m. UTC
Since we can't blindly trust the keyframe flag in packets and assume its
contents are a valid Sync Sample, do some basic bitstream parsing to build the
Sync Sample table in addition to a Random Access Recovery Point table.

Suggested-by: ffmpeg@fb.com
Signed-off-by: James Almer <jamrial@gmail.com>
---
 libavformat/movenc.c         | 142 +++++++++++++++++++++++++++++++++--
 libavformat/movenc.h         |   1 +
 tests/ref/lavf-fate/h264.mp4 |   6 +-
 3 files changed, 139 insertions(+), 10 deletions(-)

Comments

Andreas Rheinhardt July 30, 2021, 12:49 a.m. UTC | #1
James Almer:
> Since we can't blindly trust the keyframe flag in packets and assume its
> contents are a valid Sync Sample, do some basic bitstream parsing to build the
> Sync Sample table in addition to a Random Access Recovery Point table.
> 
> Suggested-by: ffmpeg@fb.com
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
>  libavformat/movenc.c         | 142 +++++++++++++++++++++++++++++++++--
>  libavformat/movenc.h         |   1 +
>  tests/ref/lavf-fate/h264.mp4 |   6 +-
>  3 files changed, 139 insertions(+), 10 deletions(-)
> 
> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
> index fedc9c0e18..01ab2f43a6 100644
> --- a/libavformat/movenc.c
> +++ b/libavformat/movenc.c
> @@ -34,13 +34,17 @@
>  #include "avc.h"
>  #include "libavcodec/ac3_parser_internal.h"
>  #include "libavcodec/dnxhddata.h"
> +#include "libavcodec/h264.h"
> +#include "libavcodec/h2645_parse.h"
>  #include "libavcodec/flac.h"
>  #include "libavcodec/get_bits.h"
> +#include "libavcodec/golomb.h"
>  
>  #include "libavcodec/internal.h"
>  #include "libavcodec/put_bits.h"
>  #include "libavcodec/vc1_common.h"
>  #include "libavcodec/raw.h"
> +#include "libavcodec/sei.h"
>  #include "internal.h"
>  #include "libavutil/avstring.h"
>  #include "libavutil/channel_layout.h"
> @@ -2537,7 +2541,9 @@ static int mov_preroll_write_stbl_atoms(AVIOContext *pb, MOVTrack *track)
>      if (!sgpd_entries)
>          return AVERROR(ENOMEM);
>  
> -    av_assert0(track->par->codec_id == AV_CODEC_ID_OPUS || track->par->codec_id == AV_CODEC_ID_AAC);
> +    av_assert0(track->par->codec_id == AV_CODEC_ID_OPUS ||
> +               track->par->codec_id == AV_CODEC_ID_AAC  ||
> +               track->par->codec_id == AV_CODEC_ID_H264);
>  
>      if (track->par->codec_id == AV_CODEC_ID_OPUS) {
>          for (i = 0; i < track->entry; i++) {
> @@ -2566,6 +2572,18 @@ static int mov_preroll_write_stbl_atoms(AVIOContext *pb, MOVTrack *track)
>                  sgpd_entries[entries].group_description_index = distance ? ++group : 0;
>              }
>          }
> +    } else if (track->par->codec_id == AV_CODEC_ID_H264) {
> +        for (i = 0; i < track->entry; i++) {
> +            int distance = track->cluster[i].roll_distance;
> +            if (i && distance == sgpd_entries[entries].roll_distance) {
> +                sgpd_entries[entries].count++;
> +            } else {
> +                entries++;
> +                sgpd_entries[entries].count = 1;
> +                sgpd_entries[entries].roll_distance = distance;
> +                sgpd_entries[entries].group_description_index = distance ? ++group : 0;
> +            }
> +        }
>      } else {
>          entries++;
>          sgpd_entries[entries].count = track->sample_count;
> @@ -2639,7 +2657,9 @@ static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext
>      if (track->cenc.aes_ctr) {
>          ff_mov_cenc_write_stbl_atoms(&track->cenc, pb);
>      }
> -    if (track->par->codec_id == AV_CODEC_ID_OPUS || track->par->codec_id == AV_CODEC_ID_AAC) {
> +    if (track->par->codec_id == AV_CODEC_ID_OPUS ||
> +        track->par->codec_id == AV_CODEC_ID_AAC  ||
> +        track->par->codec_id == AV_CODEC_ID_H264) {
>          mov_preroll_write_stbl_atoms(pb, track);
>      }
>      return update_size(pb, pos);
> @@ -5150,6 +5170,107 @@ static int mov_parse_mpeg2_frame(AVPacket *pkt, uint32_t *flags)
>      return 0;
>  }
>  
> +static int mov_parse_h264_frame(AVPacket *pkt, MOVTrack *trk, uint8_t *reformatted_data,
> +                                int size)
> +{
> +    const uint8_t *buf, *buf_end, *sei_buf = NULL;
> +    uint32_t state = -1;
> +    unsigned roll_distance = 0;
> +    int nal_length_size = 0, nalsize;
> +    int ret, idr = 0;
> +
> +    if (!size)
> +        return 0;
> +    if (trk->vos_data && trk->vos_data[0] == 1) {
> +        if (trk->vos_len < 5)
> +            return 0;
> +        nal_length_size = (trk->vos_data[4] & 0x03) + 1;
> +        buf = pkt->data;
> +        buf_end = pkt->data + size;
> +    } else {
> +        nal_length_size = 4;
> +        buf = reformatted_data;
> +        buf_end = reformatted_data + size;
> +    }
> +
> +    while (buf_end - buf >= 4) {
> +        int i = 0;

This variable can use case H264_NAL_SEI scope.

> +        nalsize = get_nalsize(nal_length_size, buf, buf_end - buf, &i, NULL);
> +        if (nalsize < 0)
> +            break;
> +        state = buf[nal_length_size];
> +        buf += nal_length_size + 1;
> +        nalsize--;
> +
> +        switch (state & 0x1f) {
> +        case H264_NAL_IDR_SLICE:
> +            idr = 1;
> +        // fall-through
> +        case H264_NAL_SLICE:
> +            goto end;

Given that you are now only parsing until you have the first VLC slice I
wonder whether it is really advantageous to reuse the already
transformed packet; after all, ff_avc_parse_nal_units_buf() has an
allocation and a memcpy more than ff_avc_parse_nal_units() which
probably outweighs any savings in this function given that SEIs are so tiny.
Maybe the helper function from my old patchset
https://patchwork.ffmpeg.org/project/ffmpeg/patch/20200709191805.21648-1-andreas.rheinhardt@gmail.com/
(which I never pushed because I feared that I was too strict) could be
helpful? (But one would probably have to add a new parameter to allow to
peek into the future (i.e. see what the next NALU will be); otherwise
one would needlessly try to find the end of the first VLC NALU.

> +        case H264_NAL_SEI: {
> +            int sei_size;
> +
> +            sei_buf = ff_nal_unit_extract_rbsp(buf, nalsize, &sei_size, 0);
> +            if (!sei_buf)
> +                goto end;
> +
> +            i = 0;
> +            do {
> +                GetBitContext gb;
> +                int payload_type = 0, payload_size = 0;
> +
> +                do {
> +                    if (sei_size - i < 1)

sei_size <= i seems more natural.

> +                         goto end;
> +                    payload_type += sei_buf[i];
> +                } while (sei_buf[i++] == 255);
> +                do {
> +                    if (sei_size - i < 1)
> +                        goto end;
> +                    payload_size += sei_buf[i];
> +                } while (sei_buf[i++] == 255);
> +
> +                if (payload_size > sei_size - i)
> +                    goto end;
> +
> +                ret = init_get_bits8(&gb, sei_buf + i, payload_size);
> +                if (ret < 0)
> +                    goto end;
> +
> +                switch (payload_type) {
> +                case SEI_TYPE_RECOVERY_POINT:
> +                    roll_distance = get_ue_golomb_long(&gb);
> +                    break;
> +                default:
> +                    break;
> +                }
> +                i += payload_size;
> +            } while (sei_size - i > 0 && sei_buf[i] != 0x80);
> +            av_freep(&sei_buf);
> +            break;
> +        }
> +        default:
> +            break;
> +        }
> +        buf += nalsize;
> +    }
> +
> +end:
> +    if (roll_distance != (int16_t)roll_distance)
> +        roll_distance = 0;
> +    trk->cluster[trk->entry].roll_distance = roll_distance;
> +
> +    if (idr) {
> +        trk->cluster[trk->entry].flags |= MOV_SYNC_SAMPLE;
> +        trk->has_keyframes++;
> +    }
> +
> +    av_freep(&sei_buf);
> +
> +    return 0;
> +}
> +
>  static void mov_parse_vc1_frame(AVPacket *pkt, MOVTrack *trk)
>  {
>      const uint8_t *start, *next, *end = pkt->data + pkt->size;
> @@ -5638,13 +5759,17 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
>              avio_write(pb, reformatted_data, size);
>          } else {
>              if (trk->cenc.aes_ctr) {
> -                size = ff_mov_cenc_avc_parse_nal_units(&trk->cenc, pb, pkt->data, size);
> -                if (size < 0) {
> -                    ret = size;
> +                ret = ff_avc_parse_nal_units_buf(pkt->data, &reformatted_data, &size);
> +                if (ret < 0)
> +                    return ret;
> +                ret = ff_mov_cenc_avc_write_nal_units(s, &trk->cenc, 4, pb, reformatted_data, size);
> +                if (ret < 0)
>                      goto err;
> -                }
>              } else {
> -                size = ff_avc_parse_nal_units(pb, pkt->data, pkt->size);
> +                ret = ff_avc_parse_nal_units_buf(pkt->data, &reformatted_data, &size);
> +                if (ret < 0)
> +                    return ret;

You can factor the call to ff_avc_parse_nal_units_buf() out; in fact,
not only the two branches seen here do this, but the code immediately
above this does it as well: It even coincides with what is here.

> +                avio_write(pb, reformatted_data, size);
>              }
>          }
>      } else if (par->codec_id == AV_CODEC_ID_HEVC && trk->vos_len > 6 &&
> @@ -5740,6 +5865,7 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
>      trk->cluster[trk->entry].entries          = samples_in_chunk;
>      trk->cluster[trk->entry].dts              = pkt->dts;
>      trk->cluster[trk->entry].pts              = pkt->pts;
> +    trk->cluster[trk->entry].roll_distance    = 0;
>      if (!trk->entry && trk->start_dts != AV_NOPTS_VALUE) {
>          if (!trk->frag_discont) {
>              /* First packet of a new fragment. We already wrote the duration
> @@ -5821,6 +5947,8 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
>  
>      if (par->codec_id == AV_CODEC_ID_VC1) {
>          mov_parse_vc1_frame(pkt, trk);
> +    } else if (par->codec_id == AV_CODEC_ID_H264 && !TAG_IS_AVCI(trk->tag)) {
> +        mov_parse_h264_frame(pkt, trk, reformatted_data, size);
>      } else if (par->codec_id == AV_CODEC_ID_TRUEHD) {
>          mov_parse_truehd_frame(pkt, trk);
>      } else if (pkt->flags & AV_PKT_FLAG_KEY) {
> diff --git a/libavformat/movenc.h b/libavformat/movenc.h
> index af1ea0bce6..73bf73ce8f 100644
> --- a/libavformat/movenc.h
> +++ b/libavformat/movenc.h
> @@ -56,6 +56,7 @@ typedef struct MOVIentry {
>  #define MOV_PARTIAL_SYNC_SAMPLE 0x0002
>  #define MOV_DISPOSABLE_SAMPLE   0x0004
>      uint32_t     flags;
> +    int16_t      roll_distance;
>      AVProducerReferenceTime prft;
>  } MOVIentry;
>  
> diff --git a/tests/ref/lavf-fate/h264.mp4 b/tests/ref/lavf-fate/h264.mp4
> index a9c3823c2c..c08ee4c7ae 100644
> --- a/tests/ref/lavf-fate/h264.mp4
> +++ b/tests/ref/lavf-fate/h264.mp4
> @@ -1,3 +1,3 @@
> -fe299ea5205b71a48281f917b1256a5d *tests/data/lavf-fate/lavf.h264.mp4
> -547928 tests/data/lavf-fate/lavf.h264.mp4
> -tests/data/lavf-fate/lavf.h264.mp4 CRC=0x9da2c999
> +2812f617314d23474fcb23898b8a56ab *tests/data/lavf-fate/lavf.h264.mp4
> +548084 tests/data/lavf-fate/lavf.h264.mp4
> +tests/data/lavf-fate/lavf.h264.mp4 CRC=0x396f0829
>
James Almer July 31, 2021, 3:01 a.m. UTC | #2
On 7/29/2021 9:49 PM, Andreas Rheinhardt wrote:
> James Almer:
>> Since we can't blindly trust the keyframe flag in packets and assume its
>> contents are a valid Sync Sample, do some basic bitstream parsing to build the
>> Sync Sample table in addition to a Random Access Recovery Point table.
>>
>> Suggested-by: ffmpeg@fb.com
>> Signed-off-by: James Almer <jamrial@gmail.com>
>> ---
>>   libavformat/movenc.c         | 142 +++++++++++++++++++++++++++++++++--
>>   libavformat/movenc.h         |   1 +
>>   tests/ref/lavf-fate/h264.mp4 |   6 +-
>>   3 files changed, 139 insertions(+), 10 deletions(-)
>>
>> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
>> index fedc9c0e18..01ab2f43a6 100644
>> --- a/libavformat/movenc.c
>> +++ b/libavformat/movenc.c
>> @@ -34,13 +34,17 @@
>>   #include "avc.h"
>>   #include "libavcodec/ac3_parser_internal.h"
>>   #include "libavcodec/dnxhddata.h"
>> +#include "libavcodec/h264.h"
>> +#include "libavcodec/h2645_parse.h"
>>   #include "libavcodec/flac.h"
>>   #include "libavcodec/get_bits.h"
>> +#include "libavcodec/golomb.h"
>>   
>>   #include "libavcodec/internal.h"
>>   #include "libavcodec/put_bits.h"
>>   #include "libavcodec/vc1_common.h"
>>   #include "libavcodec/raw.h"
>> +#include "libavcodec/sei.h"
>>   #include "internal.h"
>>   #include "libavutil/avstring.h"
>>   #include "libavutil/channel_layout.h"
>> @@ -2537,7 +2541,9 @@ static int mov_preroll_write_stbl_atoms(AVIOContext *pb, MOVTrack *track)
>>       if (!sgpd_entries)
>>           return AVERROR(ENOMEM);
>>   
>> -    av_assert0(track->par->codec_id == AV_CODEC_ID_OPUS || track->par->codec_id == AV_CODEC_ID_AAC);
>> +    av_assert0(track->par->codec_id == AV_CODEC_ID_OPUS ||
>> +               track->par->codec_id == AV_CODEC_ID_AAC  ||
>> +               track->par->codec_id == AV_CODEC_ID_H264);
>>   
>>       if (track->par->codec_id == AV_CODEC_ID_OPUS) {
>>           for (i = 0; i < track->entry; i++) {
>> @@ -2566,6 +2572,18 @@ static int mov_preroll_write_stbl_atoms(AVIOContext *pb, MOVTrack *track)
>>                   sgpd_entries[entries].group_description_index = distance ? ++group : 0;
>>               }
>>           }
>> +    } else if (track->par->codec_id == AV_CODEC_ID_H264) {
>> +        for (i = 0; i < track->entry; i++) {
>> +            int distance = track->cluster[i].roll_distance;
>> +            if (i && distance == sgpd_entries[entries].roll_distance) {
>> +                sgpd_entries[entries].count++;
>> +            } else {
>> +                entries++;
>> +                sgpd_entries[entries].count = 1;
>> +                sgpd_entries[entries].roll_distance = distance;
>> +                sgpd_entries[entries].group_description_index = distance ? ++group : 0;
>> +            }
>> +        }
>>       } else {
>>           entries++;
>>           sgpd_entries[entries].count = track->sample_count;
>> @@ -2639,7 +2657,9 @@ static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext
>>       if (track->cenc.aes_ctr) {
>>           ff_mov_cenc_write_stbl_atoms(&track->cenc, pb);
>>       }
>> -    if (track->par->codec_id == AV_CODEC_ID_OPUS || track->par->codec_id == AV_CODEC_ID_AAC) {
>> +    if (track->par->codec_id == AV_CODEC_ID_OPUS ||
>> +        track->par->codec_id == AV_CODEC_ID_AAC  ||
>> +        track->par->codec_id == AV_CODEC_ID_H264) {
>>           mov_preroll_write_stbl_atoms(pb, track);
>>       }
>>       return update_size(pb, pos);
>> @@ -5150,6 +5170,107 @@ static int mov_parse_mpeg2_frame(AVPacket *pkt, uint32_t *flags)
>>       return 0;
>>   }
>>   
>> +static int mov_parse_h264_frame(AVPacket *pkt, MOVTrack *trk, uint8_t *reformatted_data,
>> +                                int size)
>> +{
>> +    const uint8_t *buf, *buf_end, *sei_buf = NULL;
>> +    uint32_t state = -1;
>> +    unsigned roll_distance = 0;
>> +    int nal_length_size = 0, nalsize;
>> +    int ret, idr = 0;
>> +
>> +    if (!size)
>> +        return 0;
>> +    if (trk->vos_data && trk->vos_data[0] == 1) {
>> +        if (trk->vos_len < 5)
>> +            return 0;
>> +        nal_length_size = (trk->vos_data[4] & 0x03) + 1;
>> +        buf = pkt->data;
>> +        buf_end = pkt->data + size;
>> +    } else {
>> +        nal_length_size = 4;
>> +        buf = reformatted_data;
>> +        buf_end = reformatted_data + size;
>> +    }
>> +
>> +    while (buf_end - buf >= 4) {
>> +        int i = 0;
> 
> This variable can use case H264_NAL_SEI scope.

It's used by get_nalsize() in the following line.

> 
>> +        nalsize = get_nalsize(nal_length_size, buf, buf_end - buf, &i, NULL);
>> +        if (nalsize < 0)
>> +            break;
>> +        state = buf[nal_length_size];
>> +        buf += nal_length_size + 1;
>> +        nalsize--;
>> +
>> +        switch (state & 0x1f) {
>> +        case H264_NAL_IDR_SLICE:
>> +            idr = 1;
>> +        // fall-through
>> +        case H264_NAL_SLICE:
>> +            goto end;
> 
> Given that you are now only parsing until you have the first VLC slice I
> wonder whether it is really advantageous to reuse the already
> transformed packet; after all, ff_avc_parse_nal_units_buf() has an
> allocation and a memcpy more than ff_avc_parse_nal_units() which
> probably outweighs any savings in this function given that SEIs are so tiny.
> Maybe the helper function from my old patchset
> https://patchwork.ffmpeg.org/project/ffmpeg/patch/20200709191805.21648-1-andreas.rheinhardt@gmail.com/
> (which I never pushed because I feared that I was too strict) could be
> helpful? (But one would probably have to add a new parameter to allow to
> peek into the future (i.e. see what the next NALU will be); otherwise
> one would needlessly try to find the end of the first VLC NALU.

Feel free to optimize it if you commit that function or a similar one.

> 
>> +        case H264_NAL_SEI: {
>> +            int sei_size;
>> +
>> +            sei_buf = ff_nal_unit_extract_rbsp(buf, nalsize, &sei_size, 0);
>> +            if (!sei_buf)
>> +                goto end;
>> +
>> +            i = 0;
>> +            do {
>> +                GetBitContext gb;
>> +                int payload_type = 0, payload_size = 0;
>> +
>> +                do {
>> +                    if (sei_size - i < 1)
> 
> sei_size <= i seems more natural.

Will change.

> 
>> +                         goto end;
>> +                    payload_type += sei_buf[i];
>> +                } while (sei_buf[i++] == 255);
>> +                do {
>> +                    if (sei_size - i < 1)
>> +                        goto end;
>> +                    payload_size += sei_buf[i];
>> +                } while (sei_buf[i++] == 255);
>> +
>> +                if (payload_size > sei_size - i)
>> +                    goto end;
>> +
>> +                ret = init_get_bits8(&gb, sei_buf + i, payload_size);
>> +                if (ret < 0)
>> +                    goto end;
>> +
>> +                switch (payload_type) {
>> +                case SEI_TYPE_RECOVERY_POINT:
>> +                    roll_distance = get_ue_golomb_long(&gb);
>> +                    break;
>> +                default:
>> +                    break;
>> +                }
>> +                i += payload_size;
>> +            } while (sei_size - i > 0 && sei_buf[i] != 0x80);
>> +            av_freep(&sei_buf);
>> +            break;
>> +        }
>> +        default:
>> +            break;
>> +        }
>> +        buf += nalsize;
>> +    }
>> +
>> +end:
>> +    if (roll_distance != (int16_t)roll_distance)
>> +        roll_distance = 0;
>> +    trk->cluster[trk->entry].roll_distance = roll_distance;
>> +
>> +    if (idr) {
>> +        trk->cluster[trk->entry].flags |= MOV_SYNC_SAMPLE;
>> +        trk->has_keyframes++;
>> +    }
>> +
>> +    av_freep(&sei_buf);
>> +
>> +    return 0;
>> +}
>> +
>>   static void mov_parse_vc1_frame(AVPacket *pkt, MOVTrack *trk)
>>   {
>>       const uint8_t *start, *next, *end = pkt->data + pkt->size;
>> @@ -5638,13 +5759,17 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
>>               avio_write(pb, reformatted_data, size);
>>           } else {
>>               if (trk->cenc.aes_ctr) {
>> -                size = ff_mov_cenc_avc_parse_nal_units(&trk->cenc, pb, pkt->data, size);
>> -                if (size < 0) {
>> -                    ret = size;
>> +                ret = ff_avc_parse_nal_units_buf(pkt->data, &reformatted_data, &size);
>> +                if (ret < 0)
>> +                    return ret;
>> +                ret = ff_mov_cenc_avc_write_nal_units(s, &trk->cenc, 4, pb, reformatted_data, size);
>> +                if (ret < 0)
>>                       goto err;
>> -                }
>>               } else {
>> -                size = ff_avc_parse_nal_units(pb, pkt->data, pkt->size);
>> +                ret = ff_avc_parse_nal_units_buf(pkt->data, &reformatted_data, &size);
>> +                if (ret < 0)
>> +                    return ret;
> 
> You can factor the call to ff_avc_parse_nal_units_buf() out; in fact,
> not only the two branches seen here do this, but the code immediately
> above this does it as well: It even coincides with what is here.

Ah, good catch, will merge them.

> 
>> +                avio_write(pb, reformatted_data, size);
>>               }
>>           }
>>       } else if (par->codec_id == AV_CODEC_ID_HEVC && trk->vos_len > 6 &&
>> @@ -5740,6 +5865,7 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
>>       trk->cluster[trk->entry].entries          = samples_in_chunk;
>>       trk->cluster[trk->entry].dts              = pkt->dts;
>>       trk->cluster[trk->entry].pts              = pkt->pts;
>> +    trk->cluster[trk->entry].roll_distance    = 0;
>>       if (!trk->entry && trk->start_dts != AV_NOPTS_VALUE) {
>>           if (!trk->frag_discont) {
>>               /* First packet of a new fragment. We already wrote the duration
>> @@ -5821,6 +5947,8 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
>>   
>>       if (par->codec_id == AV_CODEC_ID_VC1) {
>>           mov_parse_vc1_frame(pkt, trk);
>> +    } else if (par->codec_id == AV_CODEC_ID_H264 && !TAG_IS_AVCI(trk->tag)) {
>> +        mov_parse_h264_frame(pkt, trk, reformatted_data, size);
>>       } else if (par->codec_id == AV_CODEC_ID_TRUEHD) {
>>           mov_parse_truehd_frame(pkt, trk);
>>       } else if (pkt->flags & AV_PKT_FLAG_KEY) {
>> diff --git a/libavformat/movenc.h b/libavformat/movenc.h
>> index af1ea0bce6..73bf73ce8f 100644
>> --- a/libavformat/movenc.h
>> +++ b/libavformat/movenc.h
>> @@ -56,6 +56,7 @@ typedef struct MOVIentry {
>>   #define MOV_PARTIAL_SYNC_SAMPLE 0x0002
>>   #define MOV_DISPOSABLE_SAMPLE   0x0004
>>       uint32_t     flags;
>> +    int16_t      roll_distance;
>>       AVProducerReferenceTime prft;
>>   } MOVIentry;
>>   
>> diff --git a/tests/ref/lavf-fate/h264.mp4 b/tests/ref/lavf-fate/h264.mp4
>> index a9c3823c2c..c08ee4c7ae 100644
>> --- a/tests/ref/lavf-fate/h264.mp4
>> +++ b/tests/ref/lavf-fate/h264.mp4
>> @@ -1,3 +1,3 @@
>> -fe299ea5205b71a48281f917b1256a5d *tests/data/lavf-fate/lavf.h264.mp4
>> -547928 tests/data/lavf-fate/lavf.h264.mp4
>> -tests/data/lavf-fate/lavf.h264.mp4 CRC=0x9da2c999
>> +2812f617314d23474fcb23898b8a56ab *tests/data/lavf-fate/lavf.h264.mp4
>> +548084 tests/data/lavf-fate/lavf.h264.mp4
>> +tests/data/lavf-fate/lavf.h264.mp4 CRC=0x396f0829
>>
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
diff mbox series

Patch

diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index fedc9c0e18..01ab2f43a6 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -34,13 +34,17 @@ 
 #include "avc.h"
 #include "libavcodec/ac3_parser_internal.h"
 #include "libavcodec/dnxhddata.h"
+#include "libavcodec/h264.h"
+#include "libavcodec/h2645_parse.h"
 #include "libavcodec/flac.h"
 #include "libavcodec/get_bits.h"
+#include "libavcodec/golomb.h"
 
 #include "libavcodec/internal.h"
 #include "libavcodec/put_bits.h"
 #include "libavcodec/vc1_common.h"
 #include "libavcodec/raw.h"
+#include "libavcodec/sei.h"
 #include "internal.h"
 #include "libavutil/avstring.h"
 #include "libavutil/channel_layout.h"
@@ -2537,7 +2541,9 @@  static int mov_preroll_write_stbl_atoms(AVIOContext *pb, MOVTrack *track)
     if (!sgpd_entries)
         return AVERROR(ENOMEM);
 
-    av_assert0(track->par->codec_id == AV_CODEC_ID_OPUS || track->par->codec_id == AV_CODEC_ID_AAC);
+    av_assert0(track->par->codec_id == AV_CODEC_ID_OPUS ||
+               track->par->codec_id == AV_CODEC_ID_AAC  ||
+               track->par->codec_id == AV_CODEC_ID_H264);
 
     if (track->par->codec_id == AV_CODEC_ID_OPUS) {
         for (i = 0; i < track->entry; i++) {
@@ -2566,6 +2572,18 @@  static int mov_preroll_write_stbl_atoms(AVIOContext *pb, MOVTrack *track)
                 sgpd_entries[entries].group_description_index = distance ? ++group : 0;
             }
         }
+    } else if (track->par->codec_id == AV_CODEC_ID_H264) {
+        for (i = 0; i < track->entry; i++) {
+            int distance = track->cluster[i].roll_distance;
+            if (i && distance == sgpd_entries[entries].roll_distance) {
+                sgpd_entries[entries].count++;
+            } else {
+                entries++;
+                sgpd_entries[entries].count = 1;
+                sgpd_entries[entries].roll_distance = distance;
+                sgpd_entries[entries].group_description_index = distance ? ++group : 0;
+            }
+        }
     } else {
         entries++;
         sgpd_entries[entries].count = track->sample_count;
@@ -2639,7 +2657,9 @@  static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext
     if (track->cenc.aes_ctr) {
         ff_mov_cenc_write_stbl_atoms(&track->cenc, pb);
     }
-    if (track->par->codec_id == AV_CODEC_ID_OPUS || track->par->codec_id == AV_CODEC_ID_AAC) {
+    if (track->par->codec_id == AV_CODEC_ID_OPUS ||
+        track->par->codec_id == AV_CODEC_ID_AAC  ||
+        track->par->codec_id == AV_CODEC_ID_H264) {
         mov_preroll_write_stbl_atoms(pb, track);
     }
     return update_size(pb, pos);
@@ -5150,6 +5170,107 @@  static int mov_parse_mpeg2_frame(AVPacket *pkt, uint32_t *flags)
     return 0;
 }
 
+static int mov_parse_h264_frame(AVPacket *pkt, MOVTrack *trk, uint8_t *reformatted_data,
+                                int size)
+{
+    const uint8_t *buf, *buf_end, *sei_buf = NULL;
+    uint32_t state = -1;
+    unsigned roll_distance = 0;
+    int nal_length_size = 0, nalsize;
+    int ret, idr = 0;
+
+    if (!size)
+        return 0;
+    if (trk->vos_data && trk->vos_data[0] == 1) {
+        if (trk->vos_len < 5)
+            return 0;
+        nal_length_size = (trk->vos_data[4] & 0x03) + 1;
+        buf = pkt->data;
+        buf_end = pkt->data + size;
+    } else {
+        nal_length_size = 4;
+        buf = reformatted_data;
+        buf_end = reformatted_data + size;
+    }
+
+    while (buf_end - buf >= 4) {
+        int i = 0;
+        nalsize = get_nalsize(nal_length_size, buf, buf_end - buf, &i, NULL);
+        if (nalsize < 0)
+            break;
+        state = buf[nal_length_size];
+        buf += nal_length_size + 1;
+        nalsize--;
+
+        switch (state & 0x1f) {
+        case H264_NAL_IDR_SLICE:
+            idr = 1;
+        // fall-through
+        case H264_NAL_SLICE:
+            goto end;
+        case H264_NAL_SEI: {
+            int sei_size;
+
+            sei_buf = ff_nal_unit_extract_rbsp(buf, nalsize, &sei_size, 0);
+            if (!sei_buf)
+                goto end;
+
+            i = 0;
+            do {
+                GetBitContext gb;
+                int payload_type = 0, payload_size = 0;
+
+                do {
+                    if (sei_size - i < 1)
+                         goto end;
+                    payload_type += sei_buf[i];
+                } while (sei_buf[i++] == 255);
+                do {
+                    if (sei_size - i < 1)
+                        goto end;
+                    payload_size += sei_buf[i];
+                } while (sei_buf[i++] == 255);
+
+                if (payload_size > sei_size - i)
+                    goto end;
+
+                ret = init_get_bits8(&gb, sei_buf + i, payload_size);
+                if (ret < 0)
+                    goto end;
+
+                switch (payload_type) {
+                case SEI_TYPE_RECOVERY_POINT:
+                    roll_distance = get_ue_golomb_long(&gb);
+                    break;
+                default:
+                    break;
+                }
+                i += payload_size;
+            } while (sei_size - i > 0 && sei_buf[i] != 0x80);
+            av_freep(&sei_buf);
+            break;
+        }
+        default:
+            break;
+        }
+        buf += nalsize;
+    }
+
+end:
+    if (roll_distance != (int16_t)roll_distance)
+        roll_distance = 0;
+    trk->cluster[trk->entry].roll_distance = roll_distance;
+
+    if (idr) {
+        trk->cluster[trk->entry].flags |= MOV_SYNC_SAMPLE;
+        trk->has_keyframes++;
+    }
+
+    av_freep(&sei_buf);
+
+    return 0;
+}
+
 static void mov_parse_vc1_frame(AVPacket *pkt, MOVTrack *trk)
 {
     const uint8_t *start, *next, *end = pkt->data + pkt->size;
@@ -5638,13 +5759,17 @@  int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
             avio_write(pb, reformatted_data, size);
         } else {
             if (trk->cenc.aes_ctr) {
-                size = ff_mov_cenc_avc_parse_nal_units(&trk->cenc, pb, pkt->data, size);
-                if (size < 0) {
-                    ret = size;
+                ret = ff_avc_parse_nal_units_buf(pkt->data, &reformatted_data, &size);
+                if (ret < 0)
+                    return ret;
+                ret = ff_mov_cenc_avc_write_nal_units(s, &trk->cenc, 4, pb, reformatted_data, size);
+                if (ret < 0)
                     goto err;
-                }
             } else {
-                size = ff_avc_parse_nal_units(pb, pkt->data, pkt->size);
+                ret = ff_avc_parse_nal_units_buf(pkt->data, &reformatted_data, &size);
+                if (ret < 0)
+                    return ret;
+                avio_write(pb, reformatted_data, size);
             }
         }
     } else if (par->codec_id == AV_CODEC_ID_HEVC && trk->vos_len > 6 &&
@@ -5740,6 +5865,7 @@  int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
     trk->cluster[trk->entry].entries          = samples_in_chunk;
     trk->cluster[trk->entry].dts              = pkt->dts;
     trk->cluster[trk->entry].pts              = pkt->pts;
+    trk->cluster[trk->entry].roll_distance    = 0;
     if (!trk->entry && trk->start_dts != AV_NOPTS_VALUE) {
         if (!trk->frag_discont) {
             /* First packet of a new fragment. We already wrote the duration
@@ -5821,6 +5947,8 @@  int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
 
     if (par->codec_id == AV_CODEC_ID_VC1) {
         mov_parse_vc1_frame(pkt, trk);
+    } else if (par->codec_id == AV_CODEC_ID_H264 && !TAG_IS_AVCI(trk->tag)) {
+        mov_parse_h264_frame(pkt, trk, reformatted_data, size);
     } else if (par->codec_id == AV_CODEC_ID_TRUEHD) {
         mov_parse_truehd_frame(pkt, trk);
     } else if (pkt->flags & AV_PKT_FLAG_KEY) {
diff --git a/libavformat/movenc.h b/libavformat/movenc.h
index af1ea0bce6..73bf73ce8f 100644
--- a/libavformat/movenc.h
+++ b/libavformat/movenc.h
@@ -56,6 +56,7 @@  typedef struct MOVIentry {
 #define MOV_PARTIAL_SYNC_SAMPLE 0x0002
 #define MOV_DISPOSABLE_SAMPLE   0x0004
     uint32_t     flags;
+    int16_t      roll_distance;
     AVProducerReferenceTime prft;
 } MOVIentry;
 
diff --git a/tests/ref/lavf-fate/h264.mp4 b/tests/ref/lavf-fate/h264.mp4
index a9c3823c2c..c08ee4c7ae 100644
--- a/tests/ref/lavf-fate/h264.mp4
+++ b/tests/ref/lavf-fate/h264.mp4
@@ -1,3 +1,3 @@ 
-fe299ea5205b71a48281f917b1256a5d *tests/data/lavf-fate/lavf.h264.mp4
-547928 tests/data/lavf-fate/lavf.h264.mp4
-tests/data/lavf-fate/lavf.h264.mp4 CRC=0x9da2c999
+2812f617314d23474fcb23898b8a56ab *tests/data/lavf-fate/lavf.h264.mp4
+548084 tests/data/lavf-fate/lavf.h264.mp4
+tests/data/lavf-fate/lavf.h264.mp4 CRC=0x396f0829