diff mbox series

[FFmpeg-devel,3/3] lavf/dv: use a more granular timebase for audio

Message ID 20230424155553.24560-3-anton@khirnov.net
State New
Headers show
Series [FFmpeg-devel,1/3] lavf/dv: do not set video timebase more than once | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Anton Khirnov April 24, 2023, 3:55 p.m. UTC
One that is fine enough to represent all DV audio sample rates. Audio
packet durations are now sample-accurate.

This largely undoes commit 76fbb0052df471075858c1cb82b04c8be7adba8d. To
avoid breaking the issue fixed by that commit, resync audio timestamps
against video if they get more than one frame apart. The sample from
issue #8762 still works correctly after this commit.

Slightly changes the results of the lavf-dv seektest, due to the audio
timebase being more granular.
---
CC-ing Dave, as he reported issue #8762. The sample from there still
seems to work fine, but please let me know if you notice any other
issues
---
 libavcodec/dv.h        |  3 +++
 libavformat/dv.c       | 30 ++++++++++++++++++++++++++----
 tests/ref/seek/lavf-dv | 16 ++++++++--------
 3 files changed, 37 insertions(+), 12 deletions(-)
diff mbox series

Patch

diff --git a/libavcodec/dv.h b/libavcodec/dv.h
index b473bdc992d..abff9f1ea9a 100644
--- a/libavcodec/dv.h
+++ b/libavcodec/dv.h
@@ -63,6 +63,9 @@  enum DVPackType {
 // LCM of video framerate numerators
 #define DV_TIMESCALE_VIDEO 60000
 
+// LCM of audio sample rates
+#define DV_TIMESCALE_AUDIO 14112000
+
 /**
  * maximum number of blocks per macroblock in any DV format
  */
diff --git a/libavformat/dv.c b/libavformat/dv.c
index 49c4f421fa5..41e94a87e75 100644
--- a/libavformat/dv.c
+++ b/libavformat/dv.c
@@ -71,6 +71,7 @@  struct DVDemuxContext {
     int               frames;
 
     int64_t           next_pts_video;
+    int64_t           next_pts_audio;
 };
 
 static inline uint16_t dv_audio_12to16(uint16_t sample)
@@ -282,7 +283,7 @@  static int dv_extract_audio_info(DVDemuxContext *c, const uint8_t *frame)
             if (!c->ast[i])
                 return AVERROR(ENOMEM);
 
-            avpriv_set_pts_info(c->ast[i], 64, c->sys->time_base.num, c->sys->time_base.den);
+            avpriv_set_pts_info(c->ast[i], 64, 1, DV_TIMESCALE_AUDIO);
             c->ast[i]->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
             c->ast[i]->codecpar->codec_id   = AV_CODEC_ID_PCM_S16LE;
             c->ast[i]->codecpar->ch_layout  = (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO;
@@ -421,6 +422,7 @@  int avpriv_dv_get_packet(DVDemuxContext *c, AVPacket *pkt)
 int avpriv_dv_produce_packet(DVDemuxContext *c, AVPacket *pkt,
                              uint8_t *buf, int buf_size, int64_t pos)
 {
+    int64_t pts, duration;
     int size, i;
     uint8_t *ppcm[5] = { 0 };
 
@@ -436,13 +438,30 @@  int avpriv_dv_produce_packet(DVDemuxContext *c, AVPacket *pkt,
     if (size < 0)
         return size;
 
+    if (c->ach) {
+        int64_t next_pts_video = av_rescale_q(c->next_pts_video, c->vst->time_base,
+                                              c->ast[0]->time_base);
+
+        duration = av_rescale_q(size / 4,
+                                (AVRational){ 1, c->audio_pkt[0].sample_rate },
+                                c->ast[0]->time_base);
+
+        // if audio timestamps are more than one frame away from video,
+        // assume desync happened (e.g. due to dropped audio frames) and
+        // resynchronize
+        pts = (FFABS(next_pts_video - c->next_pts_audio) >= duration) ?
+              next_pts_video : c->next_pts_audio;
+
+        c->next_pts_audio = pts + duration;
+    }
+
     for (i = 0; i < c->ach; i++) {
         DVPacket *dpkt = &c->audio_pkt[i];
 
         dpkt->pos      = pos;
         dpkt->size     = size;
-        dpkt->pts      = (c->sys->height == 720) ? (c->frames & ~1) : c->frames;
-        dpkt->duration = 1;
+        dpkt->pts      = pts;
+        dpkt->duration = duration;
 
         ppcm[i] = c->audio_buf[i];
     }
@@ -503,8 +522,11 @@  static int64_t dv_frame_offset(AVFormatContext *s, DVDemuxContext *c,
 void ff_dv_offset_reset(DVDemuxContext *c, int64_t frame_offset)
 {
     c->frames = frame_offset;
-    if (c->sys)
+    if (c->sys) {
         c->next_pts_video = av_rescale_q(frame_offset, c->sys->time_base, c->vst->time_base);
+        if (c->ast[0])
+            c->next_pts_audio = av_rescale_q(frame_offset, c->sys->time_base, c->ast[0]->time_base);
+    }
     c->audio_pkt[0].size = c->audio_pkt[1].size = 0;
     c->audio_pkt[2].size = c->audio_pkt[3].size = 0;
 }
diff --git a/tests/ref/seek/lavf-dv b/tests/ref/seek/lavf-dv
index 95f19d28f03..db46c7734c2 100644
--- a/tests/ref/seek/lavf-dv
+++ b/tests/ref/seek/lavf-dv
@@ -7,9 +7,9 @@  ret: 0         st: 0 flags:0  ts: 0.788333
 ret: 0         st: 0 flags:1 dts: 0.800000 pts: 0.800000 pos:2880000 size:144000
 ret: 0         st: 0 flags:1  ts:-0.317500
 ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:144000
-ret: 0         st: 1 flags:0  ts: 2.560000
+ret: 0         st: 1 flags:0  ts: 2.576668
 ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:3456000 size:144000
-ret: 0         st: 1 flags:1  ts: 1.480000
+ret: 0         st: 1 flags:1  ts: 1.470835
 ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:3456000 size:144000
 ret: 0         st:-1 flags:0  ts: 0.365002
 ret: 0         st: 0 flags:1 dts: 0.360000 pts: 0.360000 pos:1296000 size:144000
@@ -19,9 +19,9 @@  ret: 0         st: 0 flags:0  ts: 2.153333
 ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:3456000 size:144000
 ret: 0         st: 0 flags:1  ts: 1.047500
 ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:3456000 size:144000
-ret: 0         st: 1 flags:0  ts:-0.040000
+ret: 0         st: 1 flags:0  ts:-0.058330
 ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:144000
-ret: 0         st: 1 flags:1  ts: 2.840000
+ret: 0         st: 1 flags:1  ts: 2.835837
 ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:3456000 size:144000
 ret: 0         st:-1 flags:0  ts: 1.730004
 ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:3456000 size:144000
@@ -31,9 +31,9 @@  ret: 0         st: 0 flags:0  ts:-0.481667
 ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:144000
 ret: 0         st: 0 flags:1  ts: 2.412500
 ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:3456000 size:144000
-ret: 0         st: 1 flags:0  ts: 1.320000
+ret: 0         st: 1 flags:0  ts: 1.306672
 ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:3456000 size:144000
-ret: 0         st: 1 flags:1  ts: 0.200000
+ret: 0         st: 1 flags:1  ts: 0.200839
 ret: 0         st: 0 flags:1 dts: 0.200000 pts: 0.200000 pos: 720000 size:144000
 ret: 0         st:-1 flags:0  ts:-0.904994
 ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:144000
@@ -43,9 +43,9 @@  ret: 0         st: 0 flags:0  ts: 0.883333
 ret: 0         st: 0 flags:1 dts: 0.880000 pts: 0.880000 pos:3168000 size:144000
 ret: 0         st: 0 flags:1  ts:-0.222500
 ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:      0 size:144000
-ret: 0         st: 1 flags:0  ts: 2.680000
+ret: 0         st: 1 flags:0  ts: 2.671674
 ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:3456000 size:144000
-ret: 0         st: 1 flags:1  ts: 1.560000
+ret: 0         st: 1 flags:1  ts: 1.565841
 ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:3456000 size:144000
 ret: 0         st:-1 flags:0  ts: 0.460008
 ret: 0         st: 0 flags:1 dts: 0.480000 pts: 0.480000 pos:1728000 size:144000