diff mbox series

[FFmpeg-devel] avformat/dv: always set audio packet duration

Message ID 20210906175136.15258-1-onemda@gmail.com
State Accepted
Commit d1971d69c749bce315788376c94f9464860f115f
Headers show
Series [FFmpeg-devel] avformat/dv: always set audio packet duration | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished
andriy/make_ppc success Make finished
andriy/make_fate_ppc success Make fate finished

Commit Message

Paul B Mahol Sept. 6, 2021, 5:51 p.m. UTC
If audio packet is present in DV stream it have duration of 1 in DV timebase units.

Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
 libavformat/dv.c | 4 ++++
 1 file changed, 4 insertions(+)

Comments

Andreas Rheinhardt Sept. 6, 2021, 6:28 p.m. UTC | #1
Paul B Mahol:
> If audio packet is present in DV stream it have duration of 1 in DV timebase units.
> 
> Signed-off-by: Paul B Mahol <onemda@gmail.com>
> ---
>  libavformat/dv.c | 4 ++++
>  1 file changed, 4 insertions(+)
> 
> diff --git a/libavformat/dv.c b/libavformat/dv.c
> index d7909683c3..b2b74162df 100644
> --- a/libavformat/dv.c
> +++ b/libavformat/dv.c
> @@ -48,6 +48,7 @@ struct DVPacket {
>      int      stream_index;
>      int      flags;
>      int64_t  pos;
> +    int64_t  duration;
>  };
>  
>  struct DVDemuxContext {
> @@ -276,6 +277,7 @@ static int dv_extract_audio_info(DVDemuxContext *c, const uint8_t *frame)
>              c->audio_pkt[i].stream_index = c->ast[i]->index;
>              c->audio_pkt[i].flags       |= AV_PKT_FLAG_KEY;
>              c->audio_pkt[i].pts          = AV_NOPTS_VALUE;
> +            c->audio_pkt[i].duration     = 0;
>              c->audio_pkt[i].pos          = -1;
>          }
>          c->ast[i]->codecpar->sample_rate    = dv_audio_frequency[freq];
> @@ -374,6 +376,7 @@ int avpriv_dv_get_packet(DVDemuxContext *c, AVPacket *pkt)
>              pkt->stream_index = c->audio_pkt[i].stream_index;
>              pkt->flags        = c->audio_pkt[i].flags;
>              pkt->pts          = c->audio_pkt[i].pts;
> +            pkt->duration     = c->audio_pkt[i].duration;
>              pkt->pos          = c->audio_pkt[i].pos;
>  
>              c->audio_pkt[i].size = 0;
> @@ -404,6 +407,7 @@ int avpriv_dv_produce_packet(DVDemuxContext *c, AVPacket *pkt,
>          c->audio_pkt[i].pos  = pos;
>          c->audio_pkt[i].size = size;
>          c->audio_pkt[i].pts  = (c->sys->height == 720) ? (c->frames & ~1) : c->frames;
> +        c->audio_pkt[i].duration = 1;
>          ppcm[i] = c->audio_buf[i];
>      }
>      if (c->ach)
> 
Sure about that? According to the code, the packets contain slightly
different amounts of samples; see dv_extract_audio_info(), in particular
lines 242 and 289.
(It seems to me that one audio packet is supposed to contain the audio
for one video frame, yet the standard sample rates and standard video
framerates (in particular NTSC) are incompatible, so a slight variation
in the number of samples per frame is used.)
IMO one should set the audio timebase to be the inverse of the
samplerate and set the duration based upon the number of samples. But I
have zero experience with dv.

- Andreas
Paul B Mahol Sept. 6, 2021, 6:37 p.m. UTC | #2
On Mon, Sep 6, 2021 at 8:29 PM Andreas Rheinhardt <
andreas.rheinhardt@outlook.com> wrote:

> Paul B Mahol:
> > If audio packet is present in DV stream it have duration of 1 in DV
> timebase units.
> >
> > Signed-off-by: Paul B Mahol <onemda@gmail.com>
> > ---
> >  libavformat/dv.c | 4 ++++
> >  1 file changed, 4 insertions(+)
> >
> > diff --git a/libavformat/dv.c b/libavformat/dv.c
> > index d7909683c3..b2b74162df 100644
> > --- a/libavformat/dv.c
> > +++ b/libavformat/dv.c
> > @@ -48,6 +48,7 @@ struct DVPacket {
> >      int      stream_index;
> >      int      flags;
> >      int64_t  pos;
> > +    int64_t  duration;
> >  };
> >
> >  struct DVDemuxContext {
> > @@ -276,6 +277,7 @@ static int dv_extract_audio_info(DVDemuxContext *c,
> const uint8_t *frame)
> >              c->audio_pkt[i].stream_index = c->ast[i]->index;
> >              c->audio_pkt[i].flags       |= AV_PKT_FLAG_KEY;
> >              c->audio_pkt[i].pts          = AV_NOPTS_VALUE;
> > +            c->audio_pkt[i].duration     = 0;
> >              c->audio_pkt[i].pos          = -1;
> >          }
> >          c->ast[i]->codecpar->sample_rate    = dv_audio_frequency[freq];
> > @@ -374,6 +376,7 @@ int avpriv_dv_get_packet(DVDemuxContext *c, AVPacket
> *pkt)
> >              pkt->stream_index = c->audio_pkt[i].stream_index;
> >              pkt->flags        = c->audio_pkt[i].flags;
> >              pkt->pts          = c->audio_pkt[i].pts;
> > +            pkt->duration     = c->audio_pkt[i].duration;
> >              pkt->pos          = c->audio_pkt[i].pos;
> >
> >              c->audio_pkt[i].size = 0;
> > @@ -404,6 +407,7 @@ int avpriv_dv_produce_packet(DVDemuxContext *c,
> AVPacket *pkt,
> >          c->audio_pkt[i].pos  = pos;
> >          c->audio_pkt[i].size = size;
> >          c->audio_pkt[i].pts  = (c->sys->height == 720) ? (c->frames &
> ~1) : c->frames;
> > +        c->audio_pkt[i].duration = 1;
> >          ppcm[i] = c->audio_buf[i];
> >      }
> >      if (c->ach)
> >
> Sure about that? According to the code, the packets contain slightly
> different amounts of samples; see dv_extract_audio_info(), in particular
> lines 242 and 289.
> (It seems to me that one audio packet is supposed to contain the audio
> for one video frame, yet the standard sample rates and standard video
> framerates (in particular NTSC) are incompatible, so a slight variation
> in the number of samples per frame is used.)
> IMO one should set the audio timebase to be the inverse of the
> samplerate and set the duration based upon the number of samples. But I
> have zero experience with dv.
>

I had exact same thoughts, but DV have timestamps actually, and timestamps
are in different units than sample rate.
And audio timestamps are linked to video timestamps.


>
> - Andreas
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Dave Rice Sept. 6, 2021, 8:07 p.m. UTC | #3
> On Sep 6, 2021, at 2:28 PM, Andreas Rheinhardt <andreas.rheinhardt@outlook.com> wrote:
> 
> Paul B Mahol:
>> If audio packet is present in DV stream it have duration of 1 in DV timebase units.
>> 
>> Signed-off-by: Paul B Mahol <onemda@gmail.com>
>> ---
>> libavformat/dv.c | 4 ++++
>> 1 file changed, 4 insertions(+)
>> 
>> diff --git a/libavformat/dv.c b/libavformat/dv.c
>> index d7909683c3..b2b74162df 100644
>> --- a/libavformat/dv.c
>> +++ b/libavformat/dv.c
>> @@ -48,6 +48,7 @@ struct DVPacket {
>>     int      stream_index;
>>     int      flags;
>>     int64_t  pos;
>> +    int64_t  duration;
>> };
>> 
>> struct DVDemuxContext {
>> @@ -276,6 +277,7 @@ static int dv_extract_audio_info(DVDemuxContext *c, const uint8_t *frame)
>>             c->audio_pkt[i].stream_index = c->ast[i]->index;
>>             c->audio_pkt[i].flags       |= AV_PKT_FLAG_KEY;
>>             c->audio_pkt[i].pts          = AV_NOPTS_VALUE;
>> +            c->audio_pkt[i].duration     = 0;
>>             c->audio_pkt[i].pos          = -1;
>>         }
>>         c->ast[i]->codecpar->sample_rate    = dv_audio_frequency[freq];
>> @@ -374,6 +376,7 @@ int avpriv_dv_get_packet(DVDemuxContext *c, AVPacket *pkt)
>>             pkt->stream_index = c->audio_pkt[i].stream_index;
>>             pkt->flags        = c->audio_pkt[i].flags;
>>             pkt->pts          = c->audio_pkt[i].pts;
>> +            pkt->duration     = c->audio_pkt[i].duration;
>>             pkt->pos          = c->audio_pkt[i].pos;
>> 
>>             c->audio_pkt[i].size = 0;
>> @@ -404,6 +407,7 @@ int avpriv_dv_produce_packet(DVDemuxContext *c, AVPacket *pkt,
>>         c->audio_pkt[i].pos  = pos;
>>         c->audio_pkt[i].size = size;
>>         c->audio_pkt[i].pts  = (c->sys->height == 720) ? (c->frames & ~1) : c->frames;
>> +        c->audio_pkt[i].duration = 1;
>>         ppcm[i] = c->audio_buf[i];
>>     }
>>     if (c->ach)
>> 
> Sure about that? According to the code, the packets contain slightly
> different amounts of samples; see dv_extract_audio_info(), in particular
> lines 242 and 289.
> (It seems to me that one audio packet is supposed to contain the audio
> for one video frame, yet the standard sample rates and standard video
> framerates (in particular NTSC) are incompatible, so a slight variation
> in the number of samples per frame is used.)
> IMO one should set the audio timebase to be the inverse of the
> samplerate and set the duration based upon the number of samples. But I
> have zero experience with dv.

Yes the audio duration is variable in NTSC DV, see page 17 of http://web.archive.org/web/20060927044735/http://www.smpte.org/smpte_store/standards/pdf/s314m.pdf. Each frame has either 1600 or 1602 samples, so while the video is 1001/30000 the audio is either 1600/48000 or 1602/48000. So video at ~0.03336666667 and audio at either ~0.03333333333 or 0.033375.

I’ve tested this patch and it does correct some errors. For instance, before this patch frame 3 of the output only offers half of the needed duration, so the resulting audio track is (1001/30000)/2 shorter than the video. But after the patch, the video and audio outputs have the same length.

ffmpeg -i https://archive.org/download/test_a_202108/TESTTHIS.dv -filter_complex "[0:a:0]aresample=async=1:min_hard_comp=0.01[aud]" -c:v:0 copy -map 0:v:0 -c:a pcm_s16le -map "[aud]” output.mov

Dave Rice
Paul B Mahol Sept. 12, 2021, 6:25 p.m. UTC | #4
will apply soon
diff mbox series

Patch

diff --git a/libavformat/dv.c b/libavformat/dv.c
index d7909683c3..b2b74162df 100644
--- a/libavformat/dv.c
+++ b/libavformat/dv.c
@@ -48,6 +48,7 @@  struct DVPacket {
     int      stream_index;
     int      flags;
     int64_t  pos;
+    int64_t  duration;
 };
 
 struct DVDemuxContext {
@@ -276,6 +277,7 @@  static int dv_extract_audio_info(DVDemuxContext *c, const uint8_t *frame)
             c->audio_pkt[i].stream_index = c->ast[i]->index;
             c->audio_pkt[i].flags       |= AV_PKT_FLAG_KEY;
             c->audio_pkt[i].pts          = AV_NOPTS_VALUE;
+            c->audio_pkt[i].duration     = 0;
             c->audio_pkt[i].pos          = -1;
         }
         c->ast[i]->codecpar->sample_rate    = dv_audio_frequency[freq];
@@ -374,6 +376,7 @@  int avpriv_dv_get_packet(DVDemuxContext *c, AVPacket *pkt)
             pkt->stream_index = c->audio_pkt[i].stream_index;
             pkt->flags        = c->audio_pkt[i].flags;
             pkt->pts          = c->audio_pkt[i].pts;
+            pkt->duration     = c->audio_pkt[i].duration;
             pkt->pos          = c->audio_pkt[i].pos;
 
             c->audio_pkt[i].size = 0;
@@ -404,6 +407,7 @@  int avpriv_dv_produce_packet(DVDemuxContext *c, AVPacket *pkt,
         c->audio_pkt[i].pos  = pos;
         c->audio_pkt[i].size = size;
         c->audio_pkt[i].pts  = (c->sys->height == 720) ? (c->frames & ~1) : c->frames;
+        c->audio_pkt[i].duration = 1;
         ppcm[i] = c->audio_buf[i];
     }
     if (c->ach)