[FFmpeg-devel] avcodec: add a bsf to reorder DTS into PTS

Message ID	20220905010955.173-1-jamrial@gmail.com
State	New
Headers	show Delivered-To: ffmpegpatchwork2@gmail.com Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; From: James Almer <jamrial@gmail.com> To: ffmpeg-devel@ffmpeg.org Date: Sun, 4 Sep 2022 22:09:55 -0300 Message-Id: <20220905010955.173-1-jamrial@gmail.com> In-Reply-To: <20220830014653.2477-1-jamrial@gmail.com> References: <20220830014653.2477-1-jamrial@gmail.com> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH] avcodec: add a bsf to reorder DTS into PTS Precedence: list Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>
Series	[FFmpeg-devel] avcodec: add a bsf to reorder DTS into PTS \| expand [FFmpeg-devel] avcodec: add a bsf to reorder DTS into PTS

Context	Check	Description
yinshiyou/make_loongarch64	success	Make finished
yinshiyou/make_fate_loongarch64	success	Make fate finished
andriy/make_x86	success	Make finished
andriy/make_fate_x86	success	Make fate finished

On 9/23/2022 10:06 AM, Anton Khirnov wrote: > Quoting James Almer (2022-09-05 03:09:55) >> +static int h264_init(AVBSFContext *ctx) >> +{ >> + DTS2PTSContext *s = ctx->priv_data; >> + DTS2PTSH264Context *h264 = &s->u.h264; >> + >> + s->cbc->decompose_unit_types = h264_decompose_unit_types; >> + s->cbc->nb_decompose_unit_types = FF_ARRAY_ELEMS(h264_decompose_unit_types); >> + >> + s->nb_frame = -(ctx->par_in->video_delay << 1); >> + h264->last_poc = h264->highest_poc = INT_MIN; > > just call h264_flush()? Ok. I avoided doing that since the memsets in there would be run for no gain. But this is init() after all, so called only once. > >> + return 0; >> +} >> + >> +static int get_mmco_reset(const H264RawSliceHeader *header) >> +{ >> + if (header->nal_unit_header.nal_ref_idc == 0 || >> + !header->adaptive_ref_pic_marking_mode_flag) >> + return 0; >> + >> + for (int i = 0; i < H264_MAX_MMCO_COUNT; i++) { >> + if (header->mmco[i].memory_management_control_operation == 0) >> + return 0; >> + else if (header->mmco[i].memory_management_control_operation == 5) >> + return 1; >> + } >> + >> + return 0; >> +} >> + >> +static int h264_queue_frame(AVBSFContext *ctx, AVPacket *pkt, int poc, int *queued) >> +{ >> + DTS2PTSContext *s = ctx->priv_data; >> + DTS2PTSH264Context *h264 = &s->u.h264; >> + DTS2PTSFrame frame; >> + int poc_diff, ret; >> + >> + poc_diff = (h264->picture_structure == 3) + 1; >> + if (h264->sps.frame_mbs_only_flag && h264->poc_diff) >> + poc_diff = FFMIN(poc_diff, h264->poc_diff); >> + if (poc < 0) { >> + av_tree_enumerate(s->root, &poc_diff, NULL, dec_poc); >> + s->nb_frame -= poc_diff; >> + } >> + // Check if there was a POC reset (Like an IDR slice) > > I don't think this is enough. You should bump the sequence counter on > - IDR > - MMCO type 5 > - SPS change > - H264_NAL_END_SEQUENCE/STREAM This is handled when parsing the packet. ff_h264_init_poc() will return the actual POC value for the frame as coded in the bistream, which should have been reset in all those cases. > Then every sequence should get a separate tree and you sort POCs in each > tree. When POC is reset, for example on an IDR, the last few frames of the previous GOP are meant to have as PTS the DTS from the first few frames in the new GOP. Is using separate trees really simplifying things if i still need to cross tree boundaries to fetch timestamps? > >> + if (s->nb_frame > h264->highest_poc) { >> + s->nb_frame = 0; >> + s->gop = (s->gop + 1) % s->fifo_size; >> + h264->highest_poc = h264->last_poc; >> + } >> + >> + ret = alloc_and_insert_node(ctx, pkt->dts, pkt->duration, s->nb_frame, poc_diff, s->gop); >> + if (ret < 0) >> + return ret; >> + av_log(ctx, AV_LOG_DEBUG, "Queueing frame with POC %d, GOP %d, dts %"PRId64"\n", >> + poc, s->gop, pkt->dts); >> + s->nb_frame += poc_diff; >> + >> + // Add frame to output FIFO only once >> + if (*queued) >> + return 0; >> + >> + frame = (DTS2PTSFrame) { pkt, poc, poc_diff, s->gop }; >> + ret = av_fifo_write(s->fifo, &frame, 1); >> + av_assert2(ret >= 0); >> + *queued = 1; >> + >> + return 0; >> +} >> + >> +static int h264_filter(AVBSFContext *ctx) >> +{ >> + DTS2PTSContext *s = ctx->priv_data; >> + DTS2PTSH264Context *h264 = &s->u.h264; >> + CodedBitstreamFragment *au = &s->au; >> + AVPacket *in; >> + int output_picture_number = INT_MIN; >> + int field_poc[2]; >> + int queued = 0, ret; >> + >> + ret = ff_bsf_get_packet(ctx, &in); >> + if (ret < 0) >> + return ret; >> + >> + ret = ff_cbs_read_packet(s->cbc, au, in); >> + if (ret < 0) { >> + av_log(ctx, AV_LOG_WARNING, "Failed to parse access unit.\n"); >> + goto fail; >> + } >> + >> + for (int i = 0; i < au->nb_units; i++) { >> + CodedBitstreamUnit *unit = &au->units[i]; >> + >> + switch (unit->type) { >> + case H264_NAL_IDR_SLICE: >> + h264->poc.prev_frame_num = 0; >> + h264->poc.prev_frame_num_offset = 0; >> + h264->poc.prev_poc_msb = >> + h264->poc.prev_poc_lsb = 0; >> + // fall-through >> + case H264_NAL_SLICE: { >> + const H264RawSlice *slice = unit->content; >> + const H264RawSliceHeader *header = &slice->header; >> + const CodedBitstreamH264Context *cbs_h264 = s->cbc->priv_data; >> + const H264RawSPS *sps = cbs_h264->active_sps; >> + int got_reset; >> + >> + if (!sps) { >> + av_log(ctx, AV_LOG_ERROR, "No active SPS for a slice\n"); >> + goto fail; >> + } >> + // Initialize the SPS struct with the fields ff_h264_init_poc() cares about >> + h264->sps.frame_mbs_only_flag = sps->frame_mbs_only_flag; >> + h264->sps.log2_max_frame_num = sps->log2_max_frame_num_minus4 + 4; >> + h264->sps.poc_type = sps->pic_order_cnt_type; >> + h264->sps.log2_max_poc_lsb = sps->log2_max_pic_order_cnt_lsb_minus4 + 4; >> + h264->sps.offset_for_non_ref_pic = sps->offset_for_non_ref_pic; >> + h264->sps.offset_for_top_to_bottom_field = sps->offset_for_top_to_bottom_field; >> + h264->sps.poc_cycle_length = sps->num_ref_frames_in_pic_order_cnt_cycle; >> + for (int i = 0; i < h264->sps.poc_cycle_length; i++) > > moderately evil shadowing Yikes, will change. > >> + h264->sps.offset_for_ref_frame[i] = sps->offset_for_ref_frame[i]; >> + >> + h264->picture_structure = sps->frame_mbs_only_flag ? 3 : >> + (header->field_pic_flag ? >> + header->field_pic_flag + header->bottom_field_flag : 3); >> + >> + h264->poc.frame_num = header->frame_num; >> + h264->poc.poc_lsb = header->pic_order_cnt_lsb; >> + h264->poc.delta_poc_bottom = header->delta_pic_order_cnt_bottom; >> + h264->poc.delta_poc[0] = header->delta_pic_order_cnt[0]; >> + h264->poc.delta_poc[1] = header->delta_pic_order_cnt[1]; >> + >> + field_poc[0] = field_poc[1] = INT_MAX; >> + ret = ff_h264_init_poc(field_poc, &output_picture_number, &h264->sps, >> + &h264->poc, h264->picture_structure, >> + header->nal_unit_header.nal_ref_idc); >> + if (ret < 0) { >> + av_log(ctx, AV_LOG_ERROR, "ff_h264_init_poc() failure\n"); >> + goto fail; >> + } >> + >> + got_reset = get_mmco_reset(header); >> + h264->poc.prev_frame_num = got_reset ? 0 : h264->poc.frame_num; >> + h264->poc.prev_frame_num_offset = got_reset ? 0 : h264->poc.frame_num_offset; >> + if (header->nal_unit_header.nal_ref_idc != 0) { >> + h264->poc.prev_poc_msb = got_reset ? 0 : h264->poc.poc_msb; >> + if (got_reset) >> + h264->poc.prev_poc_lsb = h264->picture_structure == 2 ? 0 : field_poc[0]; >> + else >> + h264->poc.prev_poc_lsb = h264->poc.poc_lsb; >> + } >> + >> + if (output_picture_number != h264->last_poc) { > > Why this condition? A single frame/field could be split into several slice NALs, as signaled by first_mb_in_slice. The POC value (and pretty much everything else in the slice NAL header) is duplicated in all of them. This is to add the frame/slice only once to the tree and fifo. > > Also the entire block below looks extremely magical > and could use some detailed explanation. So each frame can be coded either as a complete frame or as field pairs. POC is most cases increases by 1 per field, even in coded frames. But in some rare cases it doesn't, like this one sample in FATE where frame_mbs_only_flag is 1 (meaning the entire sequence is coded frames and it's not signaled in a per slice basis) and POC increases by 1 per coded frame instead of 2 like in the vast majority of samples. This code below is an heuristic to find this difference between POC values and build the tree accordingly, because there doesn't seem to be anything in the bitstream to signal this at all. > >> + if (h264->last_poc != INT_MIN) { >> + int diff = FFABS(h264->last_poc - output_picture_number); >> + >> + if ((output_picture_number < 0) && !h264->last_poc) >> + h264->poc_diff = 0; >> + else if (FFABS(output_picture_number) < h264->poc_diff) { >> + diff = FFABS(output_picture_number); >> + h264->poc_diff = 0; >> + } >> + if (!h264->poc_diff || (h264->poc_diff > diff)) { >> + h264->poc_diff = diff; >> + if (h264->poc_diff == 1 && h264->sps.frame_mbs_only_flag) { >> + av_tree_enumerate(s->root, &h264->poc_diff, NULL, dec_poc); >> + s->nb_frame -= 2; >> + } >> + } >> + } >> + h264->last_poc = output_picture_number; >> + h264->highest_poc = FFMAX(h264->highest_poc, output_picture_number); >> + >> + ret = h264_queue_frame(ctx, in, output_picture_number, &queued); >> + if (ret < 0) >> + goto fail; >> + } >> + break; >> + } >> + default: >> + break; >> + } >> + } >> + >> + if (output_picture_number == INT_MIN) { >> + av_log(ctx, AV_LOG_ERROR, "No slices in access unit\n"); >> + ret = AVERROR_INVALIDDATA; >> + goto fail; >> + } >> + >> + ret = AVERROR(EAGAIN); >> +fail: >> + ff_cbs_fragment_reset(au); >> + if (!queued) >> + av_packet_free(&in); >> + >> + return ret; >> +} >> [...] >> +static int dts2pts_filter(AVBSFContext *ctx, AVPacket *out) >> +{ >> + DTS2PTSContext *s = ctx->priv_data; >> + DTS2PTSNode *poc_node = NULL, *next[2] = { NULL, NULL }; >> + DTS2PTSFrame frame; >> + int ret; >> + >> + // Fill up the FIFO and POC tree >> + if (!s->eof && av_fifo_can_write(s->fifo)) { > > More than one packet can be available, so this should probably be a > loop. What do you mean? AVBSFContext can contain at most one buffered packet. And in here i'm filling the FIFO before i start draining and replacing one packet in it at a time.

diff --git a/configure b/configure index 932ea5b553..91ee5eb303 100755 --- a/configure +++ b/configure @@ -3275,6 +3275,7 @@ aac_adtstoasc_bsf_select="adts_header mpeg4audio" av1_frame_merge_bsf_select="cbs_av1" av1_frame_split_bsf_select="cbs_av1" av1_metadata_bsf_select="cbs_av1" +dts2pts_bsf_select="cbs_h264 h264parse" eac3_core_bsf_select="ac3_parser" filter_units_bsf_select="cbs" h264_metadata_bsf_deps="const_nan" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index cb80f73d99..858e110b79 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -1176,6 +1176,7 @@ OBJS-$(CONFIG_AV1_FRAME_SPLIT_BSF) += av1_frame_split_bsf.o OBJS-$(CONFIG_CHOMP_BSF) += chomp_bsf.o OBJS-$(CONFIG_DUMP_EXTRADATA_BSF) += dump_extradata_bsf.o OBJS-$(CONFIG_DCA_CORE_BSF) += dca_core_bsf.o +OBJS-$(CONFIG_DTS2PTS_BSF) += dts2pts_bsf.o OBJS-$(CONFIG_DV_ERROR_MARKER_BSF) += dv_error_marker_bsf.o OBJS-$(CONFIG_EAC3_CORE_BSF) += eac3_core_bsf.o OBJS-$(CONFIG_EXTRACT_EXTRADATA_BSF) += extract_extradata_bsf.o \ diff --git a/libavcodec/bitstream_filters.c b/libavcodec/bitstream_filters.c index 444423ae93..a3bebefe5f 100644 --- a/libavcodec/bitstream_filters.c +++ b/libavcodec/bitstream_filters.c @@ -31,6 +31,7 @@ extern const FFBitStreamFilter ff_av1_metadata_bsf; extern const FFBitStreamFilter ff_chomp_bsf; extern const FFBitStreamFilter ff_dump_extradata_bsf; extern const FFBitStreamFilter ff_dca_core_bsf; +extern const FFBitStreamFilter ff_dts2pts_bsf; extern const FFBitStreamFilter ff_dv_error_marker_bsf; extern const FFBitStreamFilter ff_eac3_core_bsf; extern const FFBitStreamFilter ff_extract_extradata_bsf; diff --git a/libavcodec/dts2pts_bsf.c b/libavcodec/dts2pts_bsf.c new file mode 100644 index 0000000000..aaa1d1c370 --- /dev/null +++ b/libavcodec/dts2pts_bsf.c @@ -0,0 +1,534 @@ +/* + * Copyright (c) 2022 James Almer + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Derive PTS by reordering DTS from supported streams + */ + +#include "libavutil/avassert.h" +#include "libavutil/fifo.h" +#include "libavutil/opt.h" +#include "libavutil/tree.h" + +#include "bsf.h" +#include "bsf_internal.h" +#include "cbs.h" +#include "cbs_h264.h" +#include "h264_parse.h" +#include "h264_ps.h" + +typedef struct DTS2PTSNode { + int64_t dts; + int64_t duration; + int poc; + int gop; +} DTS2PTSNode; + +typedef struct DTS2PTSFrame { + AVPacket *pkt; + int poc; + int poc_diff; + int gop; +} DTS2PTSFrame; + +typedef struct DTS2PTSH264Context { + H264POCContext poc; + SPS sps; + int poc_diff; + int last_poc; + int highest_poc; + int picture_structure; +} DTS2PTSH264Context; + +typedef struct DTS2PTSContext { + struct AVTreeNode *root; + AVFifo *fifo; + + // Codec specific function pointers and constants + int (*init)(AVBSFContext *ctx); + int (*filter)(AVBSFContext *ctx); + void (*flush)(AVBSFContext *ctx); + size_t fifo_size; + + CodedBitstreamContext *cbc; + CodedBitstreamFragment au; + + union { + DTS2PTSH264Context h264; + } u; + + int nb_frame; + int gop; + int eof; +} DTS2PTSContext; + +// AVTreeNode callbacks +static int cmp_insert(const void *key, const void *node) +{ + int ret = ((const DTS2PTSNode *)key)->poc - ((const DTS2PTSNode *)node)->poc; + if (!ret) + ret = ((const DTS2PTSNode *)key)->gop - ((const DTS2PTSNode *)node)->gop; + return ret; +} + +static int cmp_find(const void *key, const void *node) +{ + int ret = ((const DTS2PTSFrame *)key)->poc - ((const DTS2PTSNode *) node)->poc; + if (!ret) + ret = ((const DTS2PTSFrame *)key)->gop - ((const DTS2PTSNode *) node)->gop; + return ret; +} + +static int dec_poc(void *opaque, void *elem) +{ + DTS2PTSNode *node = elem; + int dec = *(int *)opaque; + node->poc -= dec; + return 0; +} + +static int free_node(void *opaque, void *elem) +{ + DTS2PTSNode *node = elem; + av_free(node); + return 0; +} + +// Shared functions +static int alloc_and_insert_node(AVBSFContext *ctx, int64_t ts, int64_t duration, + int poc, int poc_diff, int gop) +{ + DTS2PTSContext *s = ctx->priv_data; + for (int i = 0; i < poc_diff; i++) { + struct AVTreeNode *node = av_tree_node_alloc(); + DTS2PTSNode *poc_node, *ret; + if (!node) + return AVERROR(ENOMEM); + poc_node = av_malloc(sizeof(*poc_node)); + if (!poc_node) { + av_free(node); + return AVERROR(ENOMEM); + } + if (i && ts != AV_NOPTS_VALUE) + ts += duration / poc_diff; + *poc_node = (DTS2PTSNode) { ts, duration, poc++, gop }; + ret = av_tree_insert(&s->root, poc_node, cmp_insert, &node); + if (ret && ret != poc_node) { + *ret = *poc_node; + av_free(poc_node); + av_free(node); + } + } + return 0; +} + +// H.264 +static const CodedBitstreamUnitType h264_decompose_unit_types[] = { + H264_NAL_SPS, + H264_NAL_PPS, + H264_NAL_IDR_SLICE, + H264_NAL_SLICE, +}; + +static int h264_init(AVBSFContext *ctx) +{ + DTS2PTSContext *s = ctx->priv_data; + DTS2PTSH264Context *h264 = &s->u.h264; + + s->cbc->decompose_unit_types = h264_decompose_unit_types; + s->cbc->nb_decompose_unit_types = FF_ARRAY_ELEMS(h264_decompose_unit_types); + + s->nb_frame = -(ctx->par_in->video_delay << 1); + h264->last_poc = h264->highest_poc = INT_MIN; + + return 0; +} + +static int get_mmco_reset(const H264RawSliceHeader *header) +{ + if (header->nal_unit_header.nal_ref_idc == 0 || + !header->adaptive_ref_pic_marking_mode_flag) + return 0; + + for (int i = 0; i < H264_MAX_MMCO_COUNT; i++) { + if (header->mmco[i].memory_management_control_operation == 0) + return 0; + else if (header->mmco[i].memory_management_control_operation == 5) + return 1; + } + + return 0; +} + +static int h264_queue_frame(AVBSFContext *ctx, AVPacket *pkt, int poc, int *queued) +{ + DTS2PTSContext *s = ctx->priv_data; + DTS2PTSH264Context *h264 = &s->u.h264; + DTS2PTSFrame frame; + int poc_diff, ret; + + poc_diff = (h264->picture_structure == 3) + 1; + if (h264->sps.frame_mbs_only_flag && h264->poc_diff) + poc_diff = FFMIN(poc_diff, h264->poc_diff); + if (poc < 0) { + av_tree_enumerate(s->root, &poc_diff, NULL, dec_poc); + s->nb_frame -= poc_diff; + } + // Check if there was a POC reset (Like an IDR slice) + if (s->nb_frame > h264->highest_poc) { + s->nb_frame = 0; + s->gop = (s->gop + 1) % s->fifo_size; + h264->highest_poc = h264->last_poc; + } + + ret = alloc_and_insert_node(ctx, pkt->dts, pkt->duration, s->nb_frame, poc_diff, s->gop); + if (ret < 0) + return ret; + av_log(ctx, AV_LOG_DEBUG, "Queueing frame with POC %d, GOP %d, dts %"PRId64"\n", + poc, s->gop, pkt->dts); + s->nb_frame += poc_diff; + + // Add frame to output FIFO only once + if (*queued) + return 0; + + frame = (DTS2PTSFrame) { pkt, poc, poc_diff, s->gop }; + ret = av_fifo_write(s->fifo, &frame, 1); + av_assert2(ret >= 0); + *queued = 1; + + return 0; +} + +static int h264_filter(AVBSFContext *ctx) +{ + DTS2PTSContext *s = ctx->priv_data; + DTS2PTSH264Context *h264 = &s->u.h264; + CodedBitstreamFragment *au = &s->au; + AVPacket *in; + int output_picture_number = INT_MIN; + int field_poc[2]; + int queued = 0, ret; + + ret = ff_bsf_get_packet(ctx, &in); + if (ret < 0) + return ret; + + ret = ff_cbs_read_packet(s->cbc, au, in); + if (ret < 0) { + av_log(ctx, AV_LOG_WARNING, "Failed to parse access unit.\n"); + goto fail; + } + + for (int i = 0; i < au->nb_units; i++) { + CodedBitstreamUnit *unit = &au->units[i]; + + switch (unit->type) { + case H264_NAL_IDR_SLICE: + h264->poc.prev_frame_num = 0; + h264->poc.prev_frame_num_offset = 0; + h264->poc.prev_poc_msb = + h264->poc.prev_poc_lsb = 0; + // fall-through + case H264_NAL_SLICE: { + const H264RawSlice *slice = unit->content; + const H264RawSliceHeader *header = &slice->header; + const CodedBitstreamH264Context *cbs_h264 = s->cbc->priv_data; + const H264RawSPS *sps = cbs_h264->active_sps; + int got_reset; + + if (!sps) { + av_log(ctx, AV_LOG_ERROR, "No active SPS for a slice\n"); + goto fail; + } + // Initialize the SPS struct with the fields ff_h264_init_poc() cares about + h264->sps.frame_mbs_only_flag = sps->frame_mbs_only_flag; + h264->sps.log2_max_frame_num = sps->log2_max_frame_num_minus4 + 4; + h264->sps.poc_type = sps->pic_order_cnt_type; + h264->sps.log2_max_poc_lsb = sps->log2_max_pic_order_cnt_lsb_minus4 + 4; + h264->sps.offset_for_non_ref_pic = sps->offset_for_non_ref_pic; + h264->sps.offset_for_top_to_bottom_field = sps->offset_for_top_to_bottom_field; + h264->sps.poc_cycle_length = sps->num_ref_frames_in_pic_order_cnt_cycle; + for (int i = 0; i < h264->sps.poc_cycle_length; i++) + h264->sps.offset_for_ref_frame[i] = sps->offset_for_ref_frame[i]; + + h264->picture_structure = sps->frame_mbs_only_flag ? 3 : + (header->field_pic_flag ? + header->field_pic_flag + header->bottom_field_flag : 3); + + h264->poc.frame_num = header->frame_num; + h264->poc.poc_lsb = header->pic_order_cnt_lsb; + h264->poc.delta_poc_bottom = header->delta_pic_order_cnt_bottom; + h264->poc.delta_poc[0] = header->delta_pic_order_cnt[0]; + h264->poc.delta_poc[1] = header->delta_pic_order_cnt[1]; + + field_poc[0] = field_poc[1] = INT_MAX; + ret = ff_h264_init_poc(field_poc, &output_picture_number, &h264->sps, + &h264->poc, h264->picture_structure, + header->nal_unit_header.nal_ref_idc); + if (ret < 0) { + av_log(ctx, AV_LOG_ERROR, "ff_h264_init_poc() failure\n"); + goto fail; + } + + got_reset = get_mmco_reset(header); + h264->poc.prev_frame_num = got_reset ? 0 : h264->poc.frame_num; + h264->poc.prev_frame_num_offset = got_reset ? 0 : h264->poc.frame_num_offset; + if (header->nal_unit_header.nal_ref_idc != 0) { + h264->poc.prev_poc_msb = got_reset ? 0 : h264->poc.poc_msb; + if (got_reset) + h264->poc.prev_poc_lsb = h264->picture_structure == 2 ? 0 : field_poc[0]; + else + h264->poc.prev_poc_lsb = h264->poc.poc_lsb; + } + + if (output_picture_number != h264->last_poc) { + if (h264->last_poc != INT_MIN) { + int diff = FFABS(h264->last_poc - output_picture_number); + + if ((output_picture_number < 0) && !h264->last_poc) + h264->poc_diff = 0; + else if (FFABS(output_picture_number) < h264->poc_diff) { + diff = FFABS(output_picture_number); + h264->poc_diff = 0; + } + if (!h264->poc_diff || (h264->poc_diff > diff)) { + h264->poc_diff = diff; + if (h264->poc_diff == 1 && h264->sps.frame_mbs_only_flag) { + av_tree_enumerate(s->root, &h264->poc_diff, NULL, dec_poc); + s->nb_frame -= 2; + } + } + } + h264->last_poc = output_picture_number; + h264->highest_poc = FFMAX(h264->highest_poc, output_picture_number); + + ret = h264_queue_frame(ctx, in, output_picture_number, &queued); + if (ret < 0) + goto fail; + } + break; + } + default: + break; + } + } + + if (output_picture_number == INT_MIN) { + av_log(ctx, AV_LOG_ERROR, "No slices in access unit\n"); + ret = AVERROR_INVALIDDATA; + goto fail; + } + + ret = AVERROR(EAGAIN); +fail: + ff_cbs_fragment_reset(au); + if (!queued) + av_packet_free(&in); + + return ret; +} + +static void h264_flush(AVBSFContext *ctx) +{ + DTS2PTSContext *s = ctx->priv_data; + DTS2PTSH264Context *h264 = &s->u.h264; + + memset(&h264->sps, 0, sizeof(h264->sps)); + memset(&h264->poc, 0, sizeof(h264->poc)); + s->nb_frame = -(ctx->par_in->video_delay << 1); + h264->last_poc = h264->highest_poc = INT_MIN; +} + +// Core functions +static const struct { + enum AVCodecID id; + int (*init)(AVBSFContext *ctx); + int (*filter)(AVBSFContext *ctx); + void (*flush)(AVBSFContext *ctx); + size_t fifo_size; +} func_tab[] = { + { AV_CODEC_ID_H264, h264_init, h264_filter, h264_flush, H264_MAX_DPB_FRAMES * 2 * 2 }, +}; + +static int dts2pts_init(AVBSFContext *ctx) +{ + DTS2PTSContext *s = ctx->priv_data; + CodedBitstreamFragment *au = &s->au; + int i, ret; + + for (i = 0; i < FF_ARRAY_ELEMS(func_tab); i++) { + if (func_tab[i].id == ctx->par_in->codec_id) { + s->init = func_tab[i].init; + s->filter = func_tab[i].filter; + s->flush = func_tab[i].flush; + s->fifo_size = func_tab[i].fifo_size; + break; + } + } + if (i == FF_ARRAY_ELEMS(func_tab)) + return AVERROR_BUG; + av_assert0(s->filter && s->fifo_size); + + s->fifo = av_fifo_alloc2(s->fifo_size, sizeof(DTS2PTSFrame), 0); + if (!s->fifo) + return AVERROR(ENOMEM); + + ret = ff_cbs_init(&s->cbc, ctx->par_in->codec_id, ctx); + if (ret < 0) + return ret; + + if (s->init) { + ret = s->init(ctx); + if (ret < 0) + return ret; + } + + if (!ctx->par_in->extradata_size) + return 0; + + ret = ff_cbs_read_extradata(s->cbc, au, ctx->par_in); + if (ret < 0) + av_log(ctx, AV_LOG_WARNING, "Failed to parse extradata.\n"); + + ff_cbs_fragment_reset(au); + + return 0; +} + +static int dts2pts_filter(AVBSFContext *ctx, AVPacket *out) +{ + DTS2PTSContext *s = ctx->priv_data; + DTS2PTSNode *poc_node = NULL, *next[2] = { NULL, NULL }; + DTS2PTSFrame frame; + int ret; + + // Fill up the FIFO and POC tree + if (!s->eof && av_fifo_can_write(s->fifo)) { + ret = s->filter(ctx); + if (ret != AVERROR_EOF) + return ret; + s->eof = 1; + } + + if (!av_fifo_can_read(s->fifo)) + return AVERROR_EOF; + + // Fetch a packet from the FIFO + ret = av_fifo_read(s->fifo, &frame, 1); + av_assert2(ret >= 0); + av_packet_move_ref(out, frame.pkt); + av_packet_free(&frame.pkt); + + // Search the timestamp for the requested POC and set PTS + poc_node = av_tree_find(s->root, &frame, cmp_find, (void **)next); + if (!poc_node) { + poc_node = next[1]; + if (!poc_node || poc_node->poc != frame.poc) + poc_node = next[0]; + } + if (poc_node && poc_node->poc == frame.poc) { + out->pts = poc_node->dts; + if (!s->eof) { + // Remove the found entry from the tree + DTS2PTSFrame dup = (DTS2PTSFrame) { NULL, frame.poc + 1, frame.poc_diff, frame.gop }; + for (; dup.poc_diff > 0; dup.poc++, dup.poc_diff--) { + struct AVTreeNode *node = NULL; + if (!poc_node || poc_node->dts != out->pts) + continue; + av_tree_insert(&s->root, poc_node, cmp_insert, &node); + av_free(poc_node); + av_free(node); + poc_node = av_tree_find(s->root, &dup, cmp_find, NULL); + } + } + } else { + DTS2PTSFrame dup = (DTS2PTSFrame) { NULL, frame.poc - 1, frame.poc_diff, frame.gop }; + if (s->eof && (poc_node = av_tree_find(s->root, &dup, cmp_find, NULL)) && poc_node->poc == dup.poc) { + out->pts = poc_node->dts; + if (out->pts != AV_NOPTS_VALUE) + out->pts += poc_node->duration; + ret = alloc_and_insert_node(ctx, out->pts, out->duration, + frame.poc, frame.poc_diff, frame.gop); + if (ret < 0) { + av_packet_unref(out); + return ret; + } + if (!ret) + av_log(ctx, AV_LOG_DEBUG, "Queueing frame for POC %d, GOP %d, dts %"PRId64", " + "generated from POC %d, GOP %d, dts %"PRId64", duration %"PRId64"\n", + frame.poc, frame.gop, out->pts, + poc_node->poc, poc_node->gop, poc_node->dts, poc_node->duration); + } else + av_log(ctx, AV_LOG_WARNING, "No timestamp for POC %d in tree\n", frame.poc); + } + av_log(ctx, AV_LOG_DEBUG, "Returning frame for POC %d, GOP %d, dts %"PRId64", pts %"PRId64"\n", + frame.poc, frame.gop, out->dts, out->pts); + + return 0; +} + +static void dts2pts_flush(AVBSFContext *ctx) +{ + DTS2PTSContext *s = ctx->priv_data; + DTS2PTSFrame frame; + + if (s->flush) + s->flush(ctx); + s->eof = 0; + s->gop = 0; + + while (s->fifo && av_fifo_read(s->fifo, &frame, 1) >= 0) + av_packet_free(&frame.pkt); + + av_tree_enumerate(s->root, NULL, NULL, free_node); + av_tree_destroy(s->root); + s->root = NULL; + + ff_cbs_fragment_reset(&s->au); + ff_cbs_flush(s->cbc); +} + +static void dts2pts_close(AVBSFContext *ctx) +{ + DTS2PTSContext *s = ctx->priv_data; + + dts2pts_flush(ctx); + + av_fifo_freep2(&s->fifo); + ff_cbs_fragment_free(&s->au); + ff_cbs_close(&s->cbc); +} + +static const enum AVCodecID dts2pts_codec_ids[] = { + AV_CODEC_ID_H264, + AV_CODEC_ID_NONE, +}; + +const FFBitStreamFilter ff_dts2pts_bsf = { + .p.name = "dts2pts", + .p.codec_ids = dts2pts_codec_ids, + .priv_data_size = sizeof(DTS2PTSContext), + .init = dts2pts_init, + .flush = dts2pts_flush, + .close = dts2pts_close, + .filter = dts2pts_filter, +};

[FFmpeg-devel] avcodec: add a bsf to reorder DTS into PTS

Checks

Commit Message

Comments

Patch