diff mbox

[FFmpeg-devel] Use packet DTS to correct frame PTS for PTS missing video in nvidia cuvid decoder

Message ID df48eff3f27f65766effa2a6b6b6d9@cweb03.nmdf.nhnsystem.com
State Superseded
Headers show

Commit Message

=?utf-8?B?67Cx7KSA7Iud?= March 20, 2019, 11:18 p.m. UTC
- Current Status
In cuvid decoder wrapper, PTS is resolved based on input packet PTS.
When PTS is missing, cuvid decoder produces monotorically increasing numbers with same interval.
This does not occur problem when every single frame in video has same frame duration like fixed frame rate video.

- Problem
For variable frame rate video with missing PTS, however, resolving PTS based on monotorically increasing number with same interval do not reflect actual frame position in timedomain.
Test sample is AVI without PTS info, and has missing frames which means this is variable frame rate video. When cuvid decoder processes this video, various frame duration is ignored due to missing PTS. This directly leads to AV sync problem and a lot of frame duplication at end of video.

- Solution
To correct each frame duration, packet DTS is passed through cuvidParseVideoData() to resolve output frame duration.
Since passed packet DTS is not actual PTS, resolved value through CuvidParsedFrame is stored in frame->best_effort_timestamp like any other decoder wrapper.

Signed-off-by: JoonsikBaek <js100@linecorp.com>
---
 libavcodec/cuviddec.c | 36 +++++++++++++++++++++++++++---------
 1 file changed, 27 insertions(+), 9 deletions(-)
diff mbox

Patch

diff --git a/libavcodec/cuviddec.c b/libavcodec/cuviddec.c
index 291bb93..c9f0ff1 100644
--- a/libavcodec/cuviddec.c
+++ b/libavcodec/cuviddec.c
@@ -81,6 +81,8 @@  typedef struct CuvidContext
     int internal_error;
     int decoder_flushing;
 
+    int use_best_effort_pts_from_dts;
+
     int *key_frame;
 
     cudaVideoCodec codec_type;
@@ -384,6 +386,7 @@  static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
     AVPacket filter_packet = { 0 };
     AVPacket filtered_packet = { 0 };
     int ret = 0, eret = 0, is_flush = ctx->decoder_flushing;
+    int64_t timestamp = AV_NOPTS_VALUE;
 
     av_log(avctx, AV_LOG_TRACE, "cuvid_decode_packet\n");
 
@@ -426,11 +429,18 @@  static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
         cupkt.payload = avpkt->data;
 
         if (avpkt->pts != AV_NOPTS_VALUE) {
+            timestamp = avpkt->pts;
+        } else if (avpkt->dts != AV_NOPTS_VALUE) {
+            ctx->use_best_effort_pts_from_dts = 1;
+            timestamp = avpkt->dts;
+        }
+
+        if (timestamp != AV_NOPTS_VALUE) {
             cupkt.flags = CUVID_PKT_TIMESTAMP;
             if (avctx->pkt_timebase.num && avctx->pkt_timebase.den)
-                cupkt.timestamp = av_rescale_q(avpkt->pts, avctx->pkt_timebase, (AVRational){1, 10000000});
+                cupkt.timestamp = av_rescale_q(timestamp, avctx->pkt_timebase, (AVRational){1, 10000000});
             else
-                cupkt.timestamp = avpkt->pts;
+                cupkt.timestamp = timestamp;
         }
     } else {
         cupkt.flags = CUVID_PKT_ENDOFSTREAM;
@@ -506,6 +516,7 @@  static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
         unsigned int pitch = 0;
         int offset = 0;
         int i;
+        int64_t timestamp;
 
         av_fifo_generic_read(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
 
@@ -610,22 +621,29 @@  static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
         frame->key_frame = ctx->key_frame[parsed_frame.dispinfo.picture_index];
         frame->width = avctx->width;
         frame->height = avctx->height;
+
         if (avctx->pkt_timebase.num && avctx->pkt_timebase.den)
-            frame->pts = av_rescale_q(parsed_frame.dispinfo.timestamp, (AVRational){1, 10000000}, avctx->pkt_timebase);
+            timestamp = av_rescale_q(parsed_frame.dispinfo.timestamp, (AVRational){1, 10000000}, avctx->pkt_timebase);
         else
-            frame->pts = parsed_frame.dispinfo.timestamp;
+            timestamp = parsed_frame.dispinfo.timestamp;
 
         if (parsed_frame.second_field) {
             if (ctx->prev_pts == INT64_MIN) {
-                ctx->prev_pts = frame->pts;
-                frame->pts += (avctx->pkt_timebase.den * avctx->framerate.den) / (avctx->pkt_timebase.num * avctx->framerate.num);
+                ctx->prev_pts = timestamp;
+                timestamp += (avctx->pkt_timebase.den * avctx->framerate.den) / (avctx->pkt_timebase.num * avctx->framerate.num);
             } else {
-                int pts_diff = (frame->pts - ctx->prev_pts) / 2;
-                ctx->prev_pts = frame->pts;
-                frame->pts += pts_diff;
+                int pts_diff = (timestamp - ctx->prev_pts) / 2;
+                ctx->prev_pts = timestamp;
+                timestamp += pts_diff;
             }
         }
 
+        if(ctx->use_best_effort_pts_from_dts) {
+            frame->best_effort_timestamp = timestamp;
+        } else {
+            frame->pts = timestamp;
+        }
+
         /* CUVIDs opaque reordering breaks the internal pkt logic.
          * So set pkt_pts and clear all the other pkt_ fields.
          */