Message ID | 20210729210651.25168-2-jamrial@gmail.com |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel,1/2] avformat/movenc: don't negate roll_distance when writing it | expand |
Context | Check | Description |
---|---|---|
andriy/x86_make | success | Make finished |
andriy/x86_make_fate | fail | Make fate failed |
andriy/PPC64_make | success | Make finished |
andriy/PPC64_make_fate | warning | Make fate failed |
James Almer: > Since we can't blindly trust the keyframe flag in packets and assume its > contents are a valid Sync Sample, do some basic bitstream parsing to build the > Sync Sample table in addition to a Random Access Recovery Point table. > > Suggested-by: ffmpeg@fb.com > Signed-off-by: James Almer <jamrial@gmail.com> > --- > libavformat/movenc.c | 142 +++++++++++++++++++++++++++++++++-- > libavformat/movenc.h | 1 + > tests/ref/lavf-fate/h264.mp4 | 6 +- > 3 files changed, 139 insertions(+), 10 deletions(-) > > diff --git a/libavformat/movenc.c b/libavformat/movenc.c > index fedc9c0e18..01ab2f43a6 100644 > --- a/libavformat/movenc.c > +++ b/libavformat/movenc.c > @@ -34,13 +34,17 @@ > #include "avc.h" > #include "libavcodec/ac3_parser_internal.h" > #include "libavcodec/dnxhddata.h" > +#include "libavcodec/h264.h" > +#include "libavcodec/h2645_parse.h" > #include "libavcodec/flac.h" > #include "libavcodec/get_bits.h" > +#include "libavcodec/golomb.h" > > #include "libavcodec/internal.h" > #include "libavcodec/put_bits.h" > #include "libavcodec/vc1_common.h" > #include "libavcodec/raw.h" > +#include "libavcodec/sei.h" > #include "internal.h" > #include "libavutil/avstring.h" > #include "libavutil/channel_layout.h" > @@ -2537,7 +2541,9 @@ static int mov_preroll_write_stbl_atoms(AVIOContext *pb, MOVTrack *track) > if (!sgpd_entries) > return AVERROR(ENOMEM); > > - av_assert0(track->par->codec_id == AV_CODEC_ID_OPUS || track->par->codec_id == AV_CODEC_ID_AAC); > + av_assert0(track->par->codec_id == AV_CODEC_ID_OPUS || > + track->par->codec_id == AV_CODEC_ID_AAC || > + track->par->codec_id == AV_CODEC_ID_H264); > > if (track->par->codec_id == AV_CODEC_ID_OPUS) { > for (i = 0; i < track->entry; i++) { > @@ -2566,6 +2572,18 @@ static int mov_preroll_write_stbl_atoms(AVIOContext *pb, MOVTrack *track) > sgpd_entries[entries].group_description_index = distance ? ++group : 0; > } > } > + } else if (track->par->codec_id == AV_CODEC_ID_H264) { > + for (i = 0; i < track->entry; i++) { > + int distance = track->cluster[i].roll_distance; > + if (i && distance == sgpd_entries[entries].roll_distance) { > + sgpd_entries[entries].count++; > + } else { > + entries++; > + sgpd_entries[entries].count = 1; > + sgpd_entries[entries].roll_distance = distance; > + sgpd_entries[entries].group_description_index = distance ? ++group : 0; > + } > + } > } else { > entries++; > sgpd_entries[entries].count = track->sample_count; > @@ -2639,7 +2657,9 @@ static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext > if (track->cenc.aes_ctr) { > ff_mov_cenc_write_stbl_atoms(&track->cenc, pb); > } > - if (track->par->codec_id == AV_CODEC_ID_OPUS || track->par->codec_id == AV_CODEC_ID_AAC) { > + if (track->par->codec_id == AV_CODEC_ID_OPUS || > + track->par->codec_id == AV_CODEC_ID_AAC || > + track->par->codec_id == AV_CODEC_ID_H264) { > mov_preroll_write_stbl_atoms(pb, track); > } > return update_size(pb, pos); > @@ -5150,6 +5170,107 @@ static int mov_parse_mpeg2_frame(AVPacket *pkt, uint32_t *flags) > return 0; > } > > +static int mov_parse_h264_frame(AVPacket *pkt, MOVTrack *trk, uint8_t *reformatted_data, > + int size) > +{ > + const uint8_t *buf, *buf_end, *sei_buf = NULL; > + uint32_t state = -1; > + unsigned roll_distance = 0; > + int nal_length_size = 0, nalsize; > + int ret, idr = 0; > + > + if (!size) > + return 0; > + if (trk->vos_data && trk->vos_data[0] == 1) { > + if (trk->vos_len < 5) > + return 0; > + nal_length_size = (trk->vos_data[4] & 0x03) + 1; > + buf = pkt->data; > + buf_end = pkt->data + size; > + } else { > + nal_length_size = 4; > + buf = reformatted_data; > + buf_end = reformatted_data + size; > + } > + > + while (buf_end - buf >= 4) { > + int i = 0; This variable can use case H264_NAL_SEI scope. > + nalsize = get_nalsize(nal_length_size, buf, buf_end - buf, &i, NULL); > + if (nalsize < 0) > + break; > + state = buf[nal_length_size]; > + buf += nal_length_size + 1; > + nalsize--; > + > + switch (state & 0x1f) { > + case H264_NAL_IDR_SLICE: > + idr = 1; > + // fall-through > + case H264_NAL_SLICE: > + goto end; Given that you are now only parsing until you have the first VLC slice I wonder whether it is really advantageous to reuse the already transformed packet; after all, ff_avc_parse_nal_units_buf() has an allocation and a memcpy more than ff_avc_parse_nal_units() which probably outweighs any savings in this function given that SEIs are so tiny. Maybe the helper function from my old patchset https://patchwork.ffmpeg.org/project/ffmpeg/patch/20200709191805.21648-1-andreas.rheinhardt@gmail.com/ (which I never pushed because I feared that I was too strict) could be helpful? (But one would probably have to add a new parameter to allow to peek into the future (i.e. see what the next NALU will be); otherwise one would needlessly try to find the end of the first VLC NALU. > + case H264_NAL_SEI: { > + int sei_size; > + > + sei_buf = ff_nal_unit_extract_rbsp(buf, nalsize, &sei_size, 0); > + if (!sei_buf) > + goto end; > + > + i = 0; > + do { > + GetBitContext gb; > + int payload_type = 0, payload_size = 0; > + > + do { > + if (sei_size - i < 1) sei_size <= i seems more natural. > + goto end; > + payload_type += sei_buf[i]; > + } while (sei_buf[i++] == 255); > + do { > + if (sei_size - i < 1) > + goto end; > + payload_size += sei_buf[i]; > + } while (sei_buf[i++] == 255); > + > + if (payload_size > sei_size - i) > + goto end; > + > + ret = init_get_bits8(&gb, sei_buf + i, payload_size); > + if (ret < 0) > + goto end; > + > + switch (payload_type) { > + case SEI_TYPE_RECOVERY_POINT: > + roll_distance = get_ue_golomb_long(&gb); > + break; > + default: > + break; > + } > + i += payload_size; > + } while (sei_size - i > 0 && sei_buf[i] != 0x80); > + av_freep(&sei_buf); > + break; > + } > + default: > + break; > + } > + buf += nalsize; > + } > + > +end: > + if (roll_distance != (int16_t)roll_distance) > + roll_distance = 0; > + trk->cluster[trk->entry].roll_distance = roll_distance; > + > + if (idr) { > + trk->cluster[trk->entry].flags |= MOV_SYNC_SAMPLE; > + trk->has_keyframes++; > + } > + > + av_freep(&sei_buf); > + > + return 0; > +} > + > static void mov_parse_vc1_frame(AVPacket *pkt, MOVTrack *trk) > { > const uint8_t *start, *next, *end = pkt->data + pkt->size; > @@ -5638,13 +5759,17 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt) > avio_write(pb, reformatted_data, size); > } else { > if (trk->cenc.aes_ctr) { > - size = ff_mov_cenc_avc_parse_nal_units(&trk->cenc, pb, pkt->data, size); > - if (size < 0) { > - ret = size; > + ret = ff_avc_parse_nal_units_buf(pkt->data, &reformatted_data, &size); > + if (ret < 0) > + return ret; > + ret = ff_mov_cenc_avc_write_nal_units(s, &trk->cenc, 4, pb, reformatted_data, size); > + if (ret < 0) > goto err; > - } > } else { > - size = ff_avc_parse_nal_units(pb, pkt->data, pkt->size); > + ret = ff_avc_parse_nal_units_buf(pkt->data, &reformatted_data, &size); > + if (ret < 0) > + return ret; You can factor the call to ff_avc_parse_nal_units_buf() out; in fact, not only the two branches seen here do this, but the code immediately above this does it as well: It even coincides with what is here. > + avio_write(pb, reformatted_data, size); > } > } > } else if (par->codec_id == AV_CODEC_ID_HEVC && trk->vos_len > 6 && > @@ -5740,6 +5865,7 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt) > trk->cluster[trk->entry].entries = samples_in_chunk; > trk->cluster[trk->entry].dts = pkt->dts; > trk->cluster[trk->entry].pts = pkt->pts; > + trk->cluster[trk->entry].roll_distance = 0; > if (!trk->entry && trk->start_dts != AV_NOPTS_VALUE) { > if (!trk->frag_discont) { > /* First packet of a new fragment. We already wrote the duration > @@ -5821,6 +5947,8 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt) > > if (par->codec_id == AV_CODEC_ID_VC1) { > mov_parse_vc1_frame(pkt, trk); > + } else if (par->codec_id == AV_CODEC_ID_H264 && !TAG_IS_AVCI(trk->tag)) { > + mov_parse_h264_frame(pkt, trk, reformatted_data, size); > } else if (par->codec_id == AV_CODEC_ID_TRUEHD) { > mov_parse_truehd_frame(pkt, trk); > } else if (pkt->flags & AV_PKT_FLAG_KEY) { > diff --git a/libavformat/movenc.h b/libavformat/movenc.h > index af1ea0bce6..73bf73ce8f 100644 > --- a/libavformat/movenc.h > +++ b/libavformat/movenc.h > @@ -56,6 +56,7 @@ typedef struct MOVIentry { > #define MOV_PARTIAL_SYNC_SAMPLE 0x0002 > #define MOV_DISPOSABLE_SAMPLE 0x0004 > uint32_t flags; > + int16_t roll_distance; > AVProducerReferenceTime prft; > } MOVIentry; > > diff --git a/tests/ref/lavf-fate/h264.mp4 b/tests/ref/lavf-fate/h264.mp4 > index a9c3823c2c..c08ee4c7ae 100644 > --- a/tests/ref/lavf-fate/h264.mp4 > +++ b/tests/ref/lavf-fate/h264.mp4 > @@ -1,3 +1,3 @@ > -fe299ea5205b71a48281f917b1256a5d *tests/data/lavf-fate/lavf.h264.mp4 > -547928 tests/data/lavf-fate/lavf.h264.mp4 > -tests/data/lavf-fate/lavf.h264.mp4 CRC=0x9da2c999 > +2812f617314d23474fcb23898b8a56ab *tests/data/lavf-fate/lavf.h264.mp4 > +548084 tests/data/lavf-fate/lavf.h264.mp4 > +tests/data/lavf-fate/lavf.h264.mp4 CRC=0x396f0829 >
On 7/29/2021 9:49 PM, Andreas Rheinhardt wrote: > James Almer: >> Since we can't blindly trust the keyframe flag in packets and assume its >> contents are a valid Sync Sample, do some basic bitstream parsing to build the >> Sync Sample table in addition to a Random Access Recovery Point table. >> >> Suggested-by: ffmpeg@fb.com >> Signed-off-by: James Almer <jamrial@gmail.com> >> --- >> libavformat/movenc.c | 142 +++++++++++++++++++++++++++++++++-- >> libavformat/movenc.h | 1 + >> tests/ref/lavf-fate/h264.mp4 | 6 +- >> 3 files changed, 139 insertions(+), 10 deletions(-) >> >> diff --git a/libavformat/movenc.c b/libavformat/movenc.c >> index fedc9c0e18..01ab2f43a6 100644 >> --- a/libavformat/movenc.c >> +++ b/libavformat/movenc.c >> @@ -34,13 +34,17 @@ >> #include "avc.h" >> #include "libavcodec/ac3_parser_internal.h" >> #include "libavcodec/dnxhddata.h" >> +#include "libavcodec/h264.h" >> +#include "libavcodec/h2645_parse.h" >> #include "libavcodec/flac.h" >> #include "libavcodec/get_bits.h" >> +#include "libavcodec/golomb.h" >> >> #include "libavcodec/internal.h" >> #include "libavcodec/put_bits.h" >> #include "libavcodec/vc1_common.h" >> #include "libavcodec/raw.h" >> +#include "libavcodec/sei.h" >> #include "internal.h" >> #include "libavutil/avstring.h" >> #include "libavutil/channel_layout.h" >> @@ -2537,7 +2541,9 @@ static int mov_preroll_write_stbl_atoms(AVIOContext *pb, MOVTrack *track) >> if (!sgpd_entries) >> return AVERROR(ENOMEM); >> >> - av_assert0(track->par->codec_id == AV_CODEC_ID_OPUS || track->par->codec_id == AV_CODEC_ID_AAC); >> + av_assert0(track->par->codec_id == AV_CODEC_ID_OPUS || >> + track->par->codec_id == AV_CODEC_ID_AAC || >> + track->par->codec_id == AV_CODEC_ID_H264); >> >> if (track->par->codec_id == AV_CODEC_ID_OPUS) { >> for (i = 0; i < track->entry; i++) { >> @@ -2566,6 +2572,18 @@ static int mov_preroll_write_stbl_atoms(AVIOContext *pb, MOVTrack *track) >> sgpd_entries[entries].group_description_index = distance ? ++group : 0; >> } >> } >> + } else if (track->par->codec_id == AV_CODEC_ID_H264) { >> + for (i = 0; i < track->entry; i++) { >> + int distance = track->cluster[i].roll_distance; >> + if (i && distance == sgpd_entries[entries].roll_distance) { >> + sgpd_entries[entries].count++; >> + } else { >> + entries++; >> + sgpd_entries[entries].count = 1; >> + sgpd_entries[entries].roll_distance = distance; >> + sgpd_entries[entries].group_description_index = distance ? ++group : 0; >> + } >> + } >> } else { >> entries++; >> sgpd_entries[entries].count = track->sample_count; >> @@ -2639,7 +2657,9 @@ static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext >> if (track->cenc.aes_ctr) { >> ff_mov_cenc_write_stbl_atoms(&track->cenc, pb); >> } >> - if (track->par->codec_id == AV_CODEC_ID_OPUS || track->par->codec_id == AV_CODEC_ID_AAC) { >> + if (track->par->codec_id == AV_CODEC_ID_OPUS || >> + track->par->codec_id == AV_CODEC_ID_AAC || >> + track->par->codec_id == AV_CODEC_ID_H264) { >> mov_preroll_write_stbl_atoms(pb, track); >> } >> return update_size(pb, pos); >> @@ -5150,6 +5170,107 @@ static int mov_parse_mpeg2_frame(AVPacket *pkt, uint32_t *flags) >> return 0; >> } >> >> +static int mov_parse_h264_frame(AVPacket *pkt, MOVTrack *trk, uint8_t *reformatted_data, >> + int size) >> +{ >> + const uint8_t *buf, *buf_end, *sei_buf = NULL; >> + uint32_t state = -1; >> + unsigned roll_distance = 0; >> + int nal_length_size = 0, nalsize; >> + int ret, idr = 0; >> + >> + if (!size) >> + return 0; >> + if (trk->vos_data && trk->vos_data[0] == 1) { >> + if (trk->vos_len < 5) >> + return 0; >> + nal_length_size = (trk->vos_data[4] & 0x03) + 1; >> + buf = pkt->data; >> + buf_end = pkt->data + size; >> + } else { >> + nal_length_size = 4; >> + buf = reformatted_data; >> + buf_end = reformatted_data + size; >> + } >> + >> + while (buf_end - buf >= 4) { >> + int i = 0; > > This variable can use case H264_NAL_SEI scope. It's used by get_nalsize() in the following line. > >> + nalsize = get_nalsize(nal_length_size, buf, buf_end - buf, &i, NULL); >> + if (nalsize < 0) >> + break; >> + state = buf[nal_length_size]; >> + buf += nal_length_size + 1; >> + nalsize--; >> + >> + switch (state & 0x1f) { >> + case H264_NAL_IDR_SLICE: >> + idr = 1; >> + // fall-through >> + case H264_NAL_SLICE: >> + goto end; > > Given that you are now only parsing until you have the first VLC slice I > wonder whether it is really advantageous to reuse the already > transformed packet; after all, ff_avc_parse_nal_units_buf() has an > allocation and a memcpy more than ff_avc_parse_nal_units() which > probably outweighs any savings in this function given that SEIs are so tiny. > Maybe the helper function from my old patchset > https://patchwork.ffmpeg.org/project/ffmpeg/patch/20200709191805.21648-1-andreas.rheinhardt@gmail.com/ > (which I never pushed because I feared that I was too strict) could be > helpful? (But one would probably have to add a new parameter to allow to > peek into the future (i.e. see what the next NALU will be); otherwise > one would needlessly try to find the end of the first VLC NALU. Feel free to optimize it if you commit that function or a similar one. > >> + case H264_NAL_SEI: { >> + int sei_size; >> + >> + sei_buf = ff_nal_unit_extract_rbsp(buf, nalsize, &sei_size, 0); >> + if (!sei_buf) >> + goto end; >> + >> + i = 0; >> + do { >> + GetBitContext gb; >> + int payload_type = 0, payload_size = 0; >> + >> + do { >> + if (sei_size - i < 1) > > sei_size <= i seems more natural. Will change. > >> + goto end; >> + payload_type += sei_buf[i]; >> + } while (sei_buf[i++] == 255); >> + do { >> + if (sei_size - i < 1) >> + goto end; >> + payload_size += sei_buf[i]; >> + } while (sei_buf[i++] == 255); >> + >> + if (payload_size > sei_size - i) >> + goto end; >> + >> + ret = init_get_bits8(&gb, sei_buf + i, payload_size); >> + if (ret < 0) >> + goto end; >> + >> + switch (payload_type) { >> + case SEI_TYPE_RECOVERY_POINT: >> + roll_distance = get_ue_golomb_long(&gb); >> + break; >> + default: >> + break; >> + } >> + i += payload_size; >> + } while (sei_size - i > 0 && sei_buf[i] != 0x80); >> + av_freep(&sei_buf); >> + break; >> + } >> + default: >> + break; >> + } >> + buf += nalsize; >> + } >> + >> +end: >> + if (roll_distance != (int16_t)roll_distance) >> + roll_distance = 0; >> + trk->cluster[trk->entry].roll_distance = roll_distance; >> + >> + if (idr) { >> + trk->cluster[trk->entry].flags |= MOV_SYNC_SAMPLE; >> + trk->has_keyframes++; >> + } >> + >> + av_freep(&sei_buf); >> + >> + return 0; >> +} >> + >> static void mov_parse_vc1_frame(AVPacket *pkt, MOVTrack *trk) >> { >> const uint8_t *start, *next, *end = pkt->data + pkt->size; >> @@ -5638,13 +5759,17 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt) >> avio_write(pb, reformatted_data, size); >> } else { >> if (trk->cenc.aes_ctr) { >> - size = ff_mov_cenc_avc_parse_nal_units(&trk->cenc, pb, pkt->data, size); >> - if (size < 0) { >> - ret = size; >> + ret = ff_avc_parse_nal_units_buf(pkt->data, &reformatted_data, &size); >> + if (ret < 0) >> + return ret; >> + ret = ff_mov_cenc_avc_write_nal_units(s, &trk->cenc, 4, pb, reformatted_data, size); >> + if (ret < 0) >> goto err; >> - } >> } else { >> - size = ff_avc_parse_nal_units(pb, pkt->data, pkt->size); >> + ret = ff_avc_parse_nal_units_buf(pkt->data, &reformatted_data, &size); >> + if (ret < 0) >> + return ret; > > You can factor the call to ff_avc_parse_nal_units_buf() out; in fact, > not only the two branches seen here do this, but the code immediately > above this does it as well: It even coincides with what is here. Ah, good catch, will merge them. > >> + avio_write(pb, reformatted_data, size); >> } >> } >> } else if (par->codec_id == AV_CODEC_ID_HEVC && trk->vos_len > 6 && >> @@ -5740,6 +5865,7 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt) >> trk->cluster[trk->entry].entries = samples_in_chunk; >> trk->cluster[trk->entry].dts = pkt->dts; >> trk->cluster[trk->entry].pts = pkt->pts; >> + trk->cluster[trk->entry].roll_distance = 0; >> if (!trk->entry && trk->start_dts != AV_NOPTS_VALUE) { >> if (!trk->frag_discont) { >> /* First packet of a new fragment. We already wrote the duration >> @@ -5821,6 +5947,8 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt) >> >> if (par->codec_id == AV_CODEC_ID_VC1) { >> mov_parse_vc1_frame(pkt, trk); >> + } else if (par->codec_id == AV_CODEC_ID_H264 && !TAG_IS_AVCI(trk->tag)) { >> + mov_parse_h264_frame(pkt, trk, reformatted_data, size); >> } else if (par->codec_id == AV_CODEC_ID_TRUEHD) { >> mov_parse_truehd_frame(pkt, trk); >> } else if (pkt->flags & AV_PKT_FLAG_KEY) { >> diff --git a/libavformat/movenc.h b/libavformat/movenc.h >> index af1ea0bce6..73bf73ce8f 100644 >> --- a/libavformat/movenc.h >> +++ b/libavformat/movenc.h >> @@ -56,6 +56,7 @@ typedef struct MOVIentry { >> #define MOV_PARTIAL_SYNC_SAMPLE 0x0002 >> #define MOV_DISPOSABLE_SAMPLE 0x0004 >> uint32_t flags; >> + int16_t roll_distance; >> AVProducerReferenceTime prft; >> } MOVIentry; >> >> diff --git a/tests/ref/lavf-fate/h264.mp4 b/tests/ref/lavf-fate/h264.mp4 >> index a9c3823c2c..c08ee4c7ae 100644 >> --- a/tests/ref/lavf-fate/h264.mp4 >> +++ b/tests/ref/lavf-fate/h264.mp4 >> @@ -1,3 +1,3 @@ >> -fe299ea5205b71a48281f917b1256a5d *tests/data/lavf-fate/lavf.h264.mp4 >> -547928 tests/data/lavf-fate/lavf.h264.mp4 >> -tests/data/lavf-fate/lavf.h264.mp4 CRC=0x9da2c999 >> +2812f617314d23474fcb23898b8a56ab *tests/data/lavf-fate/lavf.h264.mp4 >> +548084 tests/data/lavf-fate/lavf.h264.mp4 >> +tests/data/lavf-fate/lavf.h264.mp4 CRC=0x396f0829 >> > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". >
diff --git a/libavformat/movenc.c b/libavformat/movenc.c index fedc9c0e18..01ab2f43a6 100644 --- a/libavformat/movenc.c +++ b/libavformat/movenc.c @@ -34,13 +34,17 @@ #include "avc.h" #include "libavcodec/ac3_parser_internal.h" #include "libavcodec/dnxhddata.h" +#include "libavcodec/h264.h" +#include "libavcodec/h2645_parse.h" #include "libavcodec/flac.h" #include "libavcodec/get_bits.h" +#include "libavcodec/golomb.h" #include "libavcodec/internal.h" #include "libavcodec/put_bits.h" #include "libavcodec/vc1_common.h" #include "libavcodec/raw.h" +#include "libavcodec/sei.h" #include "internal.h" #include "libavutil/avstring.h" #include "libavutil/channel_layout.h" @@ -2537,7 +2541,9 @@ static int mov_preroll_write_stbl_atoms(AVIOContext *pb, MOVTrack *track) if (!sgpd_entries) return AVERROR(ENOMEM); - av_assert0(track->par->codec_id == AV_CODEC_ID_OPUS || track->par->codec_id == AV_CODEC_ID_AAC); + av_assert0(track->par->codec_id == AV_CODEC_ID_OPUS || + track->par->codec_id == AV_CODEC_ID_AAC || + track->par->codec_id == AV_CODEC_ID_H264); if (track->par->codec_id == AV_CODEC_ID_OPUS) { for (i = 0; i < track->entry; i++) { @@ -2566,6 +2572,18 @@ static int mov_preroll_write_stbl_atoms(AVIOContext *pb, MOVTrack *track) sgpd_entries[entries].group_description_index = distance ? ++group : 0; } } + } else if (track->par->codec_id == AV_CODEC_ID_H264) { + for (i = 0; i < track->entry; i++) { + int distance = track->cluster[i].roll_distance; + if (i && distance == sgpd_entries[entries].roll_distance) { + sgpd_entries[entries].count++; + } else { + entries++; + sgpd_entries[entries].count = 1; + sgpd_entries[entries].roll_distance = distance; + sgpd_entries[entries].group_description_index = distance ? ++group : 0; + } + } } else { entries++; sgpd_entries[entries].count = track->sample_count; @@ -2639,7 +2657,9 @@ static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext if (track->cenc.aes_ctr) { ff_mov_cenc_write_stbl_atoms(&track->cenc, pb); } - if (track->par->codec_id == AV_CODEC_ID_OPUS || track->par->codec_id == AV_CODEC_ID_AAC) { + if (track->par->codec_id == AV_CODEC_ID_OPUS || + track->par->codec_id == AV_CODEC_ID_AAC || + track->par->codec_id == AV_CODEC_ID_H264) { mov_preroll_write_stbl_atoms(pb, track); } return update_size(pb, pos); @@ -5150,6 +5170,107 @@ static int mov_parse_mpeg2_frame(AVPacket *pkt, uint32_t *flags) return 0; } +static int mov_parse_h264_frame(AVPacket *pkt, MOVTrack *trk, uint8_t *reformatted_data, + int size) +{ + const uint8_t *buf, *buf_end, *sei_buf = NULL; + uint32_t state = -1; + unsigned roll_distance = 0; + int nal_length_size = 0, nalsize; + int ret, idr = 0; + + if (!size) + return 0; + if (trk->vos_data && trk->vos_data[0] == 1) { + if (trk->vos_len < 5) + return 0; + nal_length_size = (trk->vos_data[4] & 0x03) + 1; + buf = pkt->data; + buf_end = pkt->data + size; + } else { + nal_length_size = 4; + buf = reformatted_data; + buf_end = reformatted_data + size; + } + + while (buf_end - buf >= 4) { + int i = 0; + nalsize = get_nalsize(nal_length_size, buf, buf_end - buf, &i, NULL); + if (nalsize < 0) + break; + state = buf[nal_length_size]; + buf += nal_length_size + 1; + nalsize--; + + switch (state & 0x1f) { + case H264_NAL_IDR_SLICE: + idr = 1; + // fall-through + case H264_NAL_SLICE: + goto end; + case H264_NAL_SEI: { + int sei_size; + + sei_buf = ff_nal_unit_extract_rbsp(buf, nalsize, &sei_size, 0); + if (!sei_buf) + goto end; + + i = 0; + do { + GetBitContext gb; + int payload_type = 0, payload_size = 0; + + do { + if (sei_size - i < 1) + goto end; + payload_type += sei_buf[i]; + } while (sei_buf[i++] == 255); + do { + if (sei_size - i < 1) + goto end; + payload_size += sei_buf[i]; + } while (sei_buf[i++] == 255); + + if (payload_size > sei_size - i) + goto end; + + ret = init_get_bits8(&gb, sei_buf + i, payload_size); + if (ret < 0) + goto end; + + switch (payload_type) { + case SEI_TYPE_RECOVERY_POINT: + roll_distance = get_ue_golomb_long(&gb); + break; + default: + break; + } + i += payload_size; + } while (sei_size - i > 0 && sei_buf[i] != 0x80); + av_freep(&sei_buf); + break; + } + default: + break; + } + buf += nalsize; + } + +end: + if (roll_distance != (int16_t)roll_distance) + roll_distance = 0; + trk->cluster[trk->entry].roll_distance = roll_distance; + + if (idr) { + trk->cluster[trk->entry].flags |= MOV_SYNC_SAMPLE; + trk->has_keyframes++; + } + + av_freep(&sei_buf); + + return 0; +} + static void mov_parse_vc1_frame(AVPacket *pkt, MOVTrack *trk) { const uint8_t *start, *next, *end = pkt->data + pkt->size; @@ -5638,13 +5759,17 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt) avio_write(pb, reformatted_data, size); } else { if (trk->cenc.aes_ctr) { - size = ff_mov_cenc_avc_parse_nal_units(&trk->cenc, pb, pkt->data, size); - if (size < 0) { - ret = size; + ret = ff_avc_parse_nal_units_buf(pkt->data, &reformatted_data, &size); + if (ret < 0) + return ret; + ret = ff_mov_cenc_avc_write_nal_units(s, &trk->cenc, 4, pb, reformatted_data, size); + if (ret < 0) goto err; - } } else { - size = ff_avc_parse_nal_units(pb, pkt->data, pkt->size); + ret = ff_avc_parse_nal_units_buf(pkt->data, &reformatted_data, &size); + if (ret < 0) + return ret; + avio_write(pb, reformatted_data, size); } } } else if (par->codec_id == AV_CODEC_ID_HEVC && trk->vos_len > 6 && @@ -5740,6 +5865,7 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt) trk->cluster[trk->entry].entries = samples_in_chunk; trk->cluster[trk->entry].dts = pkt->dts; trk->cluster[trk->entry].pts = pkt->pts; + trk->cluster[trk->entry].roll_distance = 0; if (!trk->entry && trk->start_dts != AV_NOPTS_VALUE) { if (!trk->frag_discont) { /* First packet of a new fragment. We already wrote the duration @@ -5821,6 +5947,8 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt) if (par->codec_id == AV_CODEC_ID_VC1) { mov_parse_vc1_frame(pkt, trk); + } else if (par->codec_id == AV_CODEC_ID_H264 && !TAG_IS_AVCI(trk->tag)) { + mov_parse_h264_frame(pkt, trk, reformatted_data, size); } else if (par->codec_id == AV_CODEC_ID_TRUEHD) { mov_parse_truehd_frame(pkt, trk); } else if (pkt->flags & AV_PKT_FLAG_KEY) { diff --git a/libavformat/movenc.h b/libavformat/movenc.h index af1ea0bce6..73bf73ce8f 100644 --- a/libavformat/movenc.h +++ b/libavformat/movenc.h @@ -56,6 +56,7 @@ typedef struct MOVIentry { #define MOV_PARTIAL_SYNC_SAMPLE 0x0002 #define MOV_DISPOSABLE_SAMPLE 0x0004 uint32_t flags; + int16_t roll_distance; AVProducerReferenceTime prft; } MOVIentry; diff --git a/tests/ref/lavf-fate/h264.mp4 b/tests/ref/lavf-fate/h264.mp4 index a9c3823c2c..c08ee4c7ae 100644 --- a/tests/ref/lavf-fate/h264.mp4 +++ b/tests/ref/lavf-fate/h264.mp4 @@ -1,3 +1,3 @@ -fe299ea5205b71a48281f917b1256a5d *tests/data/lavf-fate/lavf.h264.mp4 -547928 tests/data/lavf-fate/lavf.h264.mp4 -tests/data/lavf-fate/lavf.h264.mp4 CRC=0x9da2c999 +2812f617314d23474fcb23898b8a56ab *tests/data/lavf-fate/lavf.h264.mp4 +548084 tests/data/lavf-fate/lavf.h264.mp4 +tests/data/lavf-fate/lavf.h264.mp4 CRC=0x396f0829
Since we can't blindly trust the keyframe flag in packets and assume its contents are a valid Sync Sample, do some basic bitstream parsing to build the Sync Sample table in addition to a Random Access Recovery Point table. Suggested-by: ffmpeg@fb.com Signed-off-by: James Almer <jamrial@gmail.com> --- libavformat/movenc.c | 142 +++++++++++++++++++++++++++++++++-- libavformat/movenc.h | 1 + tests/ref/lavf-fate/h264.mp4 | 6 +- 3 files changed, 139 insertions(+), 10 deletions(-)