[FFmpeg-devel] lavf/movenc: Replace dts by pts to calculate duration

Submitted by manuelyuan on Nov. 7, 2019, 9:48 a.m.

Details

Message ID 20191107094801.96844-1-manuelyuan@163.com
State New
Headers show

Commit Message

manuelyuan Nov. 7, 2019, 9:48 a.m.
From: Mengyang Yuan <manuelyuan@163.com>

In this case, the input video is of dynamic frame rate and we don't want to
duplicate or drop frames, but the output video duration calculated by DTS is
incorrect, I solved it by using PTS.
There are many UGC videos with dynamic frame rates, which are represented by
PTS jumps. After transcoding with ffmpeg -vsync 0 or -vsync 2, the output
video duration becomes longer.By reading the code of x264/encoder/encoder.c,
I found that in order to predict the B frame, x264 needs to ensure that there
are enough reference frames when DTS = 0, so the DTS of these reference frames
will subtract the cache time. However, the cache time includes the part of PTS
jumps, which results in the abnormal small DTS.

Signed-off-by: Mengyang Yuan <manuelyuan@163.com>
---
 libavformat/movenc.c  | 23 ++++++++++++++---------
 libavformat/movenc.h  |  2 ++
 tests/ref/fate/movenc | 20 ++++++++++----------
 tests/ref/lavf/mov    |  4 ++--
 tests/ref/lavf/mp4    |  4 ++--
 5 files changed, 30 insertions(+), 23 deletions(-)

Patch hide | download patch | download mbox

diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 715bec1c2f..206aa48d8c 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -1015,7 +1015,7 @@  static int get_cluster_duration(MOVTrack *track, int cluster_idx)
         return 0;
 
     if (cluster_idx + 1 == track->entry)
-        next_dts = track->track_duration + track->start_dts;
+        next_dts = track->end_dts;
     else
         next_dts = track->cluster[cluster_idx + 1].dts;
 
@@ -5149,8 +5149,7 @@  static int mov_flush_fragment(AVFormatContext *s, int force)
         mov->mdat_size = 0;
         for (i = 0; i < mov->nb_streams; i++) {
             if (mov->tracks[i].entry)
-                mov->tracks[i].frag_start += mov->tracks[i].start_dts +
-                                             mov->tracks[i].track_duration -
+                mov->tracks[i].frag_start += mov->tracks[i].end_dts -
                                              mov->tracks[i].cluster[0].dts;
             mov->tracks[i].entry = 0;
             mov->tracks[i].end_reliable = 0;
@@ -5208,7 +5207,7 @@  static int mov_flush_fragment(AVFormatContext *s, int force)
         int64_t duration = 0;
 
         if (track->entry)
-            duration = track->start_dts + track->track_duration -
+            duration = track->end_dts -
                        track->cluster[0].dts;
         if (mov->flags & FF_MOV_FLAG_SEPARATE_MOOF) {
             if (!track->mdat_buf)
@@ -5281,7 +5280,7 @@  static int check_pkt(AVFormatContext *s, AVPacket *pkt)
         ref = trk->cluster[trk->entry - 1].dts;
     } else if (   trk->start_dts != AV_NOPTS_VALUE
                && !trk->frag_discont) {
-        ref = trk->start_dts + trk->track_duration;
+        ref = trk->end_dts;
     } else
         ref = pkt->dts; // Skip tests for the first packet
 
@@ -5494,7 +5493,7 @@  int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
              * of the last packet of the previous fragment based on track_duration,
              * which might not exactly match our dts. Therefore adjust the dts
              * of this packet to be what the previous packets duration implies. */
-            trk->cluster[trk->entry].dts = trk->start_dts + trk->track_duration;
+            trk->cluster[trk->entry].dts = trk->end_dts;
             /* We also may have written the pts and the corresponding duration
              * in sidx/tfrf/tfxd tags; make sure the sidx pts and duration match up with
              * the next fragment. This means the cts of the first sample must
@@ -5546,13 +5545,17 @@  int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
                    "this case.\n",
                    pkt->stream_index, pkt->dts);
     }
-    trk->track_duration = pkt->dts - trk->start_dts + pkt->duration;
-    trk->last_sample_is_subtitle_end = 0;
-
     if (pkt->pts == AV_NOPTS_VALUE) {
         av_log(s, AV_LOG_WARNING, "pts has no value\n");
         pkt->pts = pkt->dts;
     }
+    if (trk->start_pts == AV_NOPTS_VALUE) {
+        trk->start_pts = pkt->pts;
+    }
+    trk->track_duration = FFMAX(pkt->pts - trk->start_pts + pkt->duration, trk->track_duration);
+    trk->end_dts = pkt->dts + pkt->duration;
+    trk->last_sample_is_subtitle_end = 0;
+
     if (pkt->dts != pkt->pts)
         trk->flags |= MOV_TRACK_CTTS;
     trk->cluster[trk->entry].cts   = pkt->pts - pkt->dts;
@@ -6295,7 +6298,9 @@  static int mov_init(AVFormatContext *s)
          * this is updated. */
         track->hint_track = -1;
         track->start_dts  = AV_NOPTS_VALUE;
+        track->start_pts  = AV_NOPTS_VALUE;
         track->start_cts  = AV_NOPTS_VALUE;
+        track->end_dts    = AV_NOPTS_VALUE;
         track->end_pts    = AV_NOPTS_VALUE;
         track->dts_shift  = AV_NOPTS_VALUE;
         if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
diff --git a/libavformat/movenc.h b/libavformat/movenc.h
index 68d6f23a5a..ddad2631d7 100644
--- a/libavformat/movenc.h
+++ b/libavformat/movenc.h
@@ -116,7 +116,9 @@  typedef struct MOVTrack {
     uint32_t    tref_tag;
     int         tref_id; ///< trackID of the referenced track
     int64_t     start_dts;
+    int64_t     start_pts;
     int64_t     start_cts;
+    int64_t     end_dts;
     int64_t     end_pts;
     int         end_reliable;
     int64_t     dts_shift;
diff --git a/tests/ref/fate/movenc b/tests/ref/fate/movenc
index 5e8f324ea3..e5395aa521 100644
--- a/tests/ref/fate/movenc
+++ b/tests/ref/fate/movenc
@@ -12,7 +12,7 @@  write_data len 36, time nopts, type header atom ftyp
 write_data len 2629, time nopts, type header atom -
 write_data len 908, time 1000000, type sync atom moof
 write_data len 110, time nopts, type trailer atom -
-c184e168ac1e5bb3d9c70e580ab6179c 3683 non-empty-moov-no-elst
+be2119166851ee710a0cd96f5c3755a6 3683 non-empty-moov-no-elst
 write_data len 20, time nopts, type header atom ftyp
 write_data len 1171, time nopts, type header atom -
 write_data len 728, time 0, type sync atom moof
@@ -108,8 +108,8 @@  c3681590a292cb9ca19a5a982e530166 1219 delay-moov-elst-signal-init-discont
 write_data len 996, time 966667, type sync atom sidx
 aa5462cc0d2144f72154d9c309edb57d 996 delay-moov-elst-signal-second-frag-discont
 write_data len 110, time nopts, type trailer atom -
-write_data len 1243, time nopts, type header atom ftyp
-dac14c8795d5cbd91ae770c6e2880c62 1243 delay-moov-elst-signal-init-discont-largets
+write_data len 1219, time nopts, type header atom ftyp
+c3681590a292cb9ca19a5a982e530166 1219 delay-moov-elst-signal-init-discont-largets
 write_data len 996, time 279621233333, type sync atom sidx
 41cac4c3df656a87bb38363fdcd745e6 996 delay-moov-elst-signal-second-frag-discont-largets
 write_data len 110, time nopts, type trailer atom -
@@ -119,10 +119,10 @@  write_data len 996, time 5166667, type sync atom sidx
 write_data len 148, time nopts, type trailer atom -
 f12d4a0e054abcc508cc0d28cb320e57 4935 vfr
 write_data len 1219, time nopts, type header atom ftyp
-write_data len 2572, time -333333, type sync atom sidx
-write_data len 996, time 5166667, type sync atom sidx
+write_data len 3472, time -333333, type sync atom sidx
+write_data len 1460, time 5133333, type sync atom sidx
 write_data len 148, time nopts, type trailer atom -
-f12d4a0e054abcc508cc0d28cb320e57 4935 vfr-noduration
+202e3030545f4237eea43efba08f9f88 6299 vfr-noduration
 write_data len 1231, time nopts, type header atom ftyp
 write_data len 1500, time -333333, type sync atom moof
 write_data len 1500, time nopts, type unknown atom -
@@ -133,13 +133,13 @@  write_data len 1004, time nopts, type unknown atom -
 write_data len 148, time nopts, type trailer atom -
 3c2c3f98c8a047f0ecefff07570fd457 9299 large_frag
 write_data len 1231, time nopts, type header atom ftyp
-write_data len 684, time -33333, type sync atom moof
-write_data len 504, time 800000, type boundary atom moof
-write_data len 420, time 1266667, type boundary atom moof
+write_data len 620, time -33333, type sync atom moof
+write_data len 432, time 500000, type boundary atom moof
+write_data len 480, time 1166667, type boundary atom moof
 write_data len 668, time 1566667, type sync atom moof
 write_data len 440, time 2233333, type boundary atom moof
 write_data len 262, time nopts, type trailer atom -
-edd19deae2b70afcf2cd744b89b7013d 4209 vfr-noduration-interleave
+2e93b0e5027b4e27a0dd23e9f749a84d 4133 vfr-noduration-interleave
 write_data len 1231, time nopts, type header atom ftyp
 write_data len 916, time 0, type sync atom moof
 write_data len 908, time 1000000, type sync atom moof
diff --git a/tests/ref/lavf/mov b/tests/ref/lavf/mov
index 75a0c4892d..13ab0f4e8f 100644
--- a/tests/ref/lavf/mov
+++ b/tests/ref/lavf/mov
@@ -1,7 +1,7 @@ 
-11bd76730274924e02623172b82b5236 *tests/data/lavf/lavf.mov
+b5bc7dbee83ca51ce0ec3270ff4cb68a *tests/data/lavf/lavf.mov
 357539 tests/data/lavf/lavf.mov
 tests/data/lavf/lavf.mov CRC=0xbb2b949b
-6efa586655e3db043cb29668f5216610 *tests/data/lavf/lavf.mov
+5210f9308b6d26d1eb6c1e4584879bd7 *tests/data/lavf/lavf.mov
 366621 tests/data/lavf/lavf.mov
 tests/data/lavf/lavf.mov CRC=0xa9793231
 c80c625ded376602e71d5aa6ac6fdb1c *tests/data/lavf/lavf.mov
diff --git a/tests/ref/lavf/mp4 b/tests/ref/lavf/mp4
index 8482812380..0275ace9aa 100644
--- a/tests/ref/lavf/mp4
+++ b/tests/ref/lavf/mp4
@@ -1,7 +1,7 @@ 
-ebca72c186a4f3ba9bb17d9cb5b74fef *tests/data/lavf/lavf.mp4
+e469124650c123ad87974930b8faae2e *tests/data/lavf/lavf.mp4
 312457 tests/data/lavf/lavf.mp4
 tests/data/lavf/lavf.mp4 CRC=0x9d9a638a
-9944512475d82d2d601f3c96101bdf9c *tests/data/lavf/lavf.mp4
+dd1c6780572514682205ec9f3681a9a2 *tests/data/lavf/lavf.mp4
 321343 tests/data/lavf/lavf.mp4
 tests/data/lavf/lavf.mp4 CRC=0xe8130120
 7b3e71f294901067046c09f03a426bdc *tests/data/lavf/lavf.mp4