diff mbox series

[FFmpeg-devel] avformat/pcmdec: add pts and dts calculation for pcmdec

Message ID 20240514113314.1821249-1-hiccupzhu@gmail.com
State New
Headers show
Series [FFmpeg-devel] avformat/pcmdec: add pts and dts calculation for pcmdec | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 fail Make fate failed

Commit Message

Shiqi Zhu May 14, 2024, 11:33 a.m. UTC
Signed-off-by: Shiqi Zhu <hiccupzhu@gmail.com>
---
 libavformat/pcmdec.c | 37 +++++++++++++++++++++++++++++++++++--
 1 file changed, 35 insertions(+), 2 deletions(-)

Comments

Andreas Rheinhardt May 14, 2024, 11:40 a.m. UTC | #1
Shiqi Zhu:
> Signed-off-by: Shiqi Zhu <hiccupzhu@gmail.com>
> ---
>  libavformat/pcmdec.c | 37 +++++++++++++++++++++++++++++++++++--
>  1 file changed, 35 insertions(+), 2 deletions(-)
> 
> diff --git a/libavformat/pcmdec.c b/libavformat/pcmdec.c
> index 2f6508b75a..d879aefaad 100644
> --- a/libavformat/pcmdec.c
> +++ b/libavformat/pcmdec.c
> @@ -36,6 +36,7 @@ typedef struct PCMAudioDemuxerContext {
>      AVClass *class;
>      int sample_rate;
>      AVChannelLayout ch_layout;
> +    int64_t nb_samples;
>  } PCMAudioDemuxerContext;
>  
>  static int pcm_read_header(AVFormatContext *s)
> @@ -46,6 +47,7 @@ static int pcm_read_header(AVFormatContext *s)
>      uint8_t *mime_type = NULL;
>      int ret;
>  
> +    s1->nb_samples = 0;
>      st = avformat_new_stream(s, NULL);
>      if (!st)
>          return AVERROR(ENOMEM);
> @@ -104,6 +106,37 @@ static int pcm_read_header(AVFormatContext *s)
>      return 0;
>  }
>  
> +static int pcm_dec_read_packet(AVFormatContext *s, AVPacket *pkt)
> +{
> +    PCMAudioDemuxerContext *s1 = s->priv_data;
> +    AVCodecParameters *par = s->streams[0]->codecpar;
> +    int ret;
> +
> +    ret = ff_pcm_read_packet(s, pkt);
> +    if (ret < 0)
> +        return ret;
> +
> +    pkt->time_base = s->streams[0]->time_base;
> +    pkt->dts = pkt->pts = s1->nb_samples;
> +    s1->nb_samples += pkt->size / par->block_align;
> +
> +    return ret;
> +}
> +
> +static int pcm_dec_read_seek(AVFormatContext *s,
> +                             int stream_index, int64_t timestamp, int flags)
> +{
> +    PCMAudioDemuxerContext *s1 = s->priv_data;
> +    int ret;
> +
> +    ret = ff_pcm_read_seek(s, stream_index, timestamp, flags);
> +    if (ret < 0)
> +        return ret;
> +
> +    s1->nb_samples = ffstream(s->streams[0])->cur_dts;
> +    return ret;
> +}
> +
>  static const AVOption pcm_options[] = {
>      { "sample_rate", "", offsetof(PCMAudioDemuxerContext, sample_rate), AV_OPT_TYPE_INT, {.i64 = 44100}, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
>      { "ch_layout",   "", offsetof(PCMAudioDemuxerContext, ch_layout),   AV_OPT_TYPE_CHLAYOUT, {.str = "mono"}, 0, 0, AV_OPT_FLAG_DECODING_PARAM },
> @@ -126,8 +159,8 @@ const FFInputFormat ff_pcm_ ## name_ ## _demuxer = {        \
>      .p.priv_class   = &pcm_demuxer_class,                   \
>      .priv_data_size = sizeof(PCMAudioDemuxerContext),       \
>      .read_header    = pcm_read_header,                      \
> -    .read_packet    = ff_pcm_read_packet,                   \
> -    .read_seek      = ff_pcm_read_seek,                     \
> +    .read_packet    = pcm_dec_read_packet,                  \
> +    .read_seek      = pcm_dec_read_seek,                    \
>      .raw_codec_id   = codec,                                \
>      __VA_ARGS__                                             \
>  };

A quick test shows that PTS and DTS are already set generically for pcm
formats (unless the AVFMT_FLAG_NOFILLIN flag is set). If it is not in
your usecase, then you should provide details about this (preferably by
opening a ticket on trac).

- Andreas
Shiqi Zhu May 15, 2024, 1:32 a.m. UTC | #2
The purpose of this patch is to calculate pts and dts when using pcmdemux.
Is there anything wrong with doing this, or do you have any suggestions for
improvement?

Andreas Rheinhardt <andreas.rheinhardt@outlook.com> 于2024年5月14日周二 19:41写道:

> Shiqi Zhu:
> > Signed-off-by: Shiqi Zhu <hiccupzhu@gmail.com>
> > ---
> >  libavformat/pcmdec.c | 37 +++++++++++++++++++++++++++++++++++--
> >  1 file changed, 35 insertions(+), 2 deletions(-)
> >
> > diff --git a/libavformat/pcmdec.c b/libavformat/pcmdec.c
> > index 2f6508b75a..d879aefaad 100644
> > --- a/libavformat/pcmdec.c
> > +++ b/libavformat/pcmdec.c
> > @@ -36,6 +36,7 @@ typedef struct PCMAudioDemuxerContext {
> >      AVClass *class;
> >      int sample_rate;
> >      AVChannelLayout ch_layout;
> > +    int64_t nb_samples;
> >  } PCMAudioDemuxerContext;
> >
> >  static int pcm_read_header(AVFormatContext *s)
> > @@ -46,6 +47,7 @@ static int pcm_read_header(AVFormatContext *s)
> >      uint8_t *mime_type = NULL;
> >      int ret;
> >
> > +    s1->nb_samples = 0;
> >      st = avformat_new_stream(s, NULL);
> >      if (!st)
> >          return AVERROR(ENOMEM);
> > @@ -104,6 +106,37 @@ static int pcm_read_header(AVFormatContext *s)
> >      return 0;
> >  }
> >
> > +static int pcm_dec_read_packet(AVFormatContext *s, AVPacket *pkt)
> > +{
> > +    PCMAudioDemuxerContext *s1 = s->priv_data;
> > +    AVCodecParameters *par = s->streams[0]->codecpar;
> > +    int ret;
> > +
> > +    ret = ff_pcm_read_packet(s, pkt);
> > +    if (ret < 0)
> > +        return ret;
> > +
> > +    pkt->time_base = s->streams[0]->time_base;
> > +    pkt->dts = pkt->pts = s1->nb_samples;
> > +    s1->nb_samples += pkt->size / par->block_align;
> > +
> > +    return ret;
> > +}
> > +
> > +static int pcm_dec_read_seek(AVFormatContext *s,
> > +                             int stream_index, int64_t timestamp, int
> flags)
> > +{
> > +    PCMAudioDemuxerContext *s1 = s->priv_data;
> > +    int ret;
> > +
> > +    ret = ff_pcm_read_seek(s, stream_index, timestamp, flags);
> > +    if (ret < 0)
> > +        return ret;
> > +
> > +    s1->nb_samples = ffstream(s->streams[0])->cur_dts;
> > +    return ret;
> > +}
> > +
> >  static const AVOption pcm_options[] = {
> >      { "sample_rate", "", offsetof(PCMAudioDemuxerContext, sample_rate),
> AV_OPT_TYPE_INT, {.i64 = 44100}, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
> >      { "ch_layout",   "", offsetof(PCMAudioDemuxerContext, ch_layout),
>  AV_OPT_TYPE_CHLAYOUT, {.str = "mono"}, 0, 0, AV_OPT_FLAG_DECODING_PARAM },
> > @@ -126,8 +159,8 @@ const FFInputFormat ff_pcm_ ## name_ ## _demuxer =
> {        \
> >      .p.priv_class   = &pcm_demuxer_class,                   \
> >      .priv_data_size = sizeof(PCMAudioDemuxerContext),       \
> >      .read_header    = pcm_read_header,                      \
> > -    .read_packet    = ff_pcm_read_packet,                   \
> > -    .read_seek      = ff_pcm_read_seek,                     \
> > +    .read_packet    = pcm_dec_read_packet,                  \
> > +    .read_seek      = pcm_dec_read_seek,                    \
> >      .raw_codec_id   = codec,                                \
> >      __VA_ARGS__                                             \
> >  };
>
> A quick test shows that PTS and DTS are already set generically for pcm
> formats (unless the AVFMT_FLAG_NOFILLIN flag is set). If it is not in
> your usecase, then you should provide details about this (preferably by
> opening a ticket on trac).
>
> - Andreas
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Andreas Rheinhardt May 15, 2024, 1:39 a.m. UTC | #3
Hiccup Zhu:
> The purpose of this patch is to calculate pts and dts when using pcmdemux.
> Is there anything wrong with doing this, or do you have any suggestions for
> improvement?
> 

1. Don't top-post on this list.
2. PTS and DTS are already produced with this demuxer. As has been said:
If it isn't for you, open a ticket about it.

> Andreas Rheinhardt <andreas.rheinhardt@outlook.com> 于2024年5月14日周二 19:41写道:
> 
>> Shiqi Zhu:
>>> Signed-off-by: Shiqi Zhu <hiccupzhu@gmail.com>
>>> ---
>>>  libavformat/pcmdec.c | 37 +++++++++++++++++++++++++++++++++++--
>>>  1 file changed, 35 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/libavformat/pcmdec.c b/libavformat/pcmdec.c
>>> index 2f6508b75a..d879aefaad 100644
>>> --- a/libavformat/pcmdec.c
>>> +++ b/libavformat/pcmdec.c
>>> @@ -36,6 +36,7 @@ typedef struct PCMAudioDemuxerContext {
>>>      AVClass *class;
>>>      int sample_rate;
>>>      AVChannelLayout ch_layout;
>>> +    int64_t nb_samples;
>>>  } PCMAudioDemuxerContext;
>>>
>>>  static int pcm_read_header(AVFormatContext *s)
>>> @@ -46,6 +47,7 @@ static int pcm_read_header(AVFormatContext *s)
>>>      uint8_t *mime_type = NULL;
>>>      int ret;
>>>
>>> +    s1->nb_samples = 0;
>>>      st = avformat_new_stream(s, NULL);
>>>      if (!st)
>>>          return AVERROR(ENOMEM);
>>> @@ -104,6 +106,37 @@ static int pcm_read_header(AVFormatContext *s)
>>>      return 0;
>>>  }
>>>
>>> +static int pcm_dec_read_packet(AVFormatContext *s, AVPacket *pkt)
>>> +{
>>> +    PCMAudioDemuxerContext *s1 = s->priv_data;
>>> +    AVCodecParameters *par = s->streams[0]->codecpar;
>>> +    int ret;
>>> +
>>> +    ret = ff_pcm_read_packet(s, pkt);
>>> +    if (ret < 0)
>>> +        return ret;
>>> +
>>> +    pkt->time_base = s->streams[0]->time_base;
>>> +    pkt->dts = pkt->pts = s1->nb_samples;
>>> +    s1->nb_samples += pkt->size / par->block_align;
>>> +
>>> +    return ret;
>>> +}
>>> +
>>> +static int pcm_dec_read_seek(AVFormatContext *s,
>>> +                             int stream_index, int64_t timestamp, int
>> flags)
>>> +{
>>> +    PCMAudioDemuxerContext *s1 = s->priv_data;
>>> +    int ret;
>>> +
>>> +    ret = ff_pcm_read_seek(s, stream_index, timestamp, flags);
>>> +    if (ret < 0)
>>> +        return ret;
>>> +
>>> +    s1->nb_samples = ffstream(s->streams[0])->cur_dts;
>>> +    return ret;
>>> +}
>>> +
>>>  static const AVOption pcm_options[] = {
>>>      { "sample_rate", "", offsetof(PCMAudioDemuxerContext, sample_rate),
>> AV_OPT_TYPE_INT, {.i64 = 44100}, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
>>>      { "ch_layout",   "", offsetof(PCMAudioDemuxerContext, ch_layout),
>>  AV_OPT_TYPE_CHLAYOUT, {.str = "mono"}, 0, 0, AV_OPT_FLAG_DECODING_PARAM },
>>> @@ -126,8 +159,8 @@ const FFInputFormat ff_pcm_ ## name_ ## _demuxer =
>> {        \
>>>      .p.priv_class   = &pcm_demuxer_class,                   \
>>>      .priv_data_size = sizeof(PCMAudioDemuxerContext),       \
>>>      .read_header    = pcm_read_header,                      \
>>> -    .read_packet    = ff_pcm_read_packet,                   \
>>> -    .read_seek      = ff_pcm_read_seek,                     \
>>> +    .read_packet    = pcm_dec_read_packet,                  \
>>> +    .read_seek      = pcm_dec_read_seek,                    \
>>>      .raw_codec_id   = codec,                                \
>>>      __VA_ARGS__                                             \
>>>  };
>>
>> A quick test shows that PTS and DTS are already set generically for pcm
>> formats (unless the AVFMT_FLAG_NOFILLIN flag is set). If it is not in
>> your usecase, then you should provide details about this (preferably by
>> opening a ticket on trac).
>>
>> - Andreas
>>
>> _______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel@ffmpeg.org
>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>> To unsubscribe, visit link above, or email
>> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>>
> 
>
Shiqi Zhu May 15, 2024, 11:48 a.m. UTC | #4
Andreas:
>
> Hiccup Zhu:
> > The purpose of this patch is to calculate pts and dts when using pcmdemux.
> > Is there anything wrong with doing this, or do you have any suggestions for
> > improvement?
> >
>
> 1. Don't top-post on this list.
> 2. PTS and DTS are already produced with this demuxer. As has been said:
> If it isn't for you, open a ticket about it.

This is the case. I found that when opening a pcm file,
avformat_find_stream_info will keep reading pkt until the number >
max_ts_probe then exit.
The reason is that when demux pcm was used, valid pts and dts were not
read, and sti->first_pts was never set correctly;
This is unreasonable in some scenarios, because
avformat_find_stream_info will consume more time to read pkt, which is
especially serious in the case of network streams;

You can reproduce this problem by opening any pcm file.

Based on the above facts, I submitted this patch;
Of course, this problem can also be fixed by solving the assignment
problem of sti->first_pts, in another patch of mine:
https://patchwork.ffmpeg.org/project/ffmpeg/patch/20240515113522.1921274-1-hiccupzhu@gmail.com/

- Shiqi

> >
> >> Shiqi Zhu:
> >>> Signed-off-by: Shiqi Zhu <hiccupzhu@gmail.com>
> >>> ---
> >>>  libavformat/pcmdec.c | 37 +++++++++++++++++++++++++++++++++++--
> >>>  1 file changed, 35 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/libavformat/pcmdec.c b/libavformat/pcmdec.c
> >>> index 2f6508b75a..d879aefaad 100644
> >>> --- a/libavformat/pcmdec.c
> >>> +++ b/libavformat/pcmdec.c
> >>> @@ -36,6 +36,7 @@ typedef struct PCMAudioDemuxerContext {
> >>>      AVClass *class;
> >>>      int sample_rate;
> >>>      AVChannelLayout ch_layout;
> >>> +    int64_t nb_samples;
> >>>  } PCMAudioDemuxerContext;
> >>>
> >>>  static int pcm_read_header(AVFormatContext *s)
> >>> @@ -46,6 +47,7 @@ static int pcm_read_header(AVFormatContext *s)
> >>>      uint8_t *mime_type = NULL;
> >>>      int ret;
> >>>
> >>> +    s1->nb_samples = 0;
> >>>      st = avformat_new_stream(s, NULL);
> >>>      if (!st)
> >>>          return AVERROR(ENOMEM);
> >>> @@ -104,6 +106,37 @@ static int pcm_read_header(AVFormatContext *s)
> >>>      return 0;
> >>>  }
> >>>
> >>> +static int pcm_dec_read_packet(AVFormatContext *s, AVPacket *pkt)
> >>> +{
> >>> +    PCMAudioDemuxerContext *s1 = s->priv_data;
> >>> +    AVCodecParameters *par = s->streams[0]->codecpar;
> >>> +    int ret;
> >>> +
> >>> +    ret = ff_pcm_read_packet(s, pkt);
> >>> +    if (ret < 0)
> >>> +        return ret;
> >>> +
> >>> +    pkt->time_base = s->streams[0]->time_base;
> >>> +    pkt->dts = pkt->pts = s1->nb_samples;
> >>> +    s1->nb_samples += pkt->size / par->block_align;
> >>> +
> >>> +    return ret;
> >>> +}
> >>> +
> >>> +static int pcm_dec_read_seek(AVFormatContext *s,
> >>> +                             int stream_index, int64_t timestamp, int
> >> flags)
> >>> +{
> >>> +    PCMAudioDemuxerContext *s1 = s->priv_data;
> >>> +    int ret;
> >>> +
> >>> +    ret = ff_pcm_read_seek(s, stream_index, timestamp, flags);
> >>> +    if (ret < 0)
> >>> +        return ret;
> >>> +
> >>> +    s1->nb_samples = ffstream(s->streams[0])->cur_dts;
> >>> +    return ret;
> >>> +}
> >>> +
> >>>  static const AVOption pcm_options[] = {
> >>>      { "sample_rate", "", offsetof(PCMAudioDemuxerContext, sample_rate),
> >> AV_OPT_TYPE_INT, {.i64 = 44100}, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
> >>>      { "ch_layout",   "", offsetof(PCMAudioDemuxerContext, ch_layout),
> >>  AV_OPT_TYPE_CHLAYOUT, {.str = "mono"}, 0, 0, AV_OPT_FLAG_DECODING_PARAM },
> >>> @@ -126,8 +159,8 @@ const FFInputFormat ff_pcm_ ## name_ ## _demuxer =
> >> {        \
> >>>      .p.priv_class   = &pcm_demuxer_class,                   \
> >>>      .priv_data_size = sizeof(PCMAudioDemuxerContext),       \
> >>>      .read_header    = pcm_read_header,                      \
> >>> -    .read_packet    = ff_pcm_read_packet,                   \
> >>> -    .read_seek      = ff_pcm_read_seek,                     \
> >>> +    .read_packet    = pcm_dec_read_packet,                  \
> >>> +    .read_seek      = pcm_dec_read_seek,                    \
> >>>      .raw_codec_id   = codec,                                \
> >>>      __VA_ARGS__                                             \
> >>>  };
> >>
> >> A quick test shows that PTS and DTS are already set generically for pcm
> >> formats (unless the AVFMT_FLAG_NOFILLIN flag is set). If it is not in
> >> your usecase, then you should provide details about this (preferably by
> >> opening a ticket on trac).
> >>
> >> - Andreas
> >>
> >> _______________________________________________
> >> ffmpeg-devel mailing list
> >> ffmpeg-devel@ffmpeg.org
> >> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> >>
> >> To unsubscribe, visit link above, or email
> >> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
> >>
> >
> >
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
diff mbox series

Patch

diff --git a/libavformat/pcmdec.c b/libavformat/pcmdec.c
index 2f6508b75a..d879aefaad 100644
--- a/libavformat/pcmdec.c
+++ b/libavformat/pcmdec.c
@@ -36,6 +36,7 @@  typedef struct PCMAudioDemuxerContext {
     AVClass *class;
     int sample_rate;
     AVChannelLayout ch_layout;
+    int64_t nb_samples;
 } PCMAudioDemuxerContext;
 
 static int pcm_read_header(AVFormatContext *s)
@@ -46,6 +47,7 @@  static int pcm_read_header(AVFormatContext *s)
     uint8_t *mime_type = NULL;
     int ret;
 
+    s1->nb_samples = 0;
     st = avformat_new_stream(s, NULL);
     if (!st)
         return AVERROR(ENOMEM);
@@ -104,6 +106,37 @@  static int pcm_read_header(AVFormatContext *s)
     return 0;
 }
 
+static int pcm_dec_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    PCMAudioDemuxerContext *s1 = s->priv_data;
+    AVCodecParameters *par = s->streams[0]->codecpar;
+    int ret;
+
+    ret = ff_pcm_read_packet(s, pkt);
+    if (ret < 0)
+        return ret;
+
+    pkt->time_base = s->streams[0]->time_base;
+    pkt->dts = pkt->pts = s1->nb_samples;
+    s1->nb_samples += pkt->size / par->block_align;
+
+    return ret;
+}
+
+static int pcm_dec_read_seek(AVFormatContext *s,
+                             int stream_index, int64_t timestamp, int flags)
+{
+    PCMAudioDemuxerContext *s1 = s->priv_data;
+    int ret;
+
+    ret = ff_pcm_read_seek(s, stream_index, timestamp, flags);
+    if (ret < 0)
+        return ret;
+
+    s1->nb_samples = ffstream(s->streams[0])->cur_dts;
+    return ret;
+}
+
 static const AVOption pcm_options[] = {
     { "sample_rate", "", offsetof(PCMAudioDemuxerContext, sample_rate), AV_OPT_TYPE_INT, {.i64 = 44100}, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
     { "ch_layout",   "", offsetof(PCMAudioDemuxerContext, ch_layout),   AV_OPT_TYPE_CHLAYOUT, {.str = "mono"}, 0, 0, AV_OPT_FLAG_DECODING_PARAM },
@@ -126,8 +159,8 @@  const FFInputFormat ff_pcm_ ## name_ ## _demuxer = {        \
     .p.priv_class   = &pcm_demuxer_class,                   \
     .priv_data_size = sizeof(PCMAudioDemuxerContext),       \
     .read_header    = pcm_read_header,                      \
-    .read_packet    = ff_pcm_read_packet,                   \
-    .read_seek      = ff_pcm_read_seek,                     \
+    .read_packet    = pcm_dec_read_packet,                  \
+    .read_seek      = pcm_dec_read_seek,                    \
     .raw_codec_id   = codec,                                \
     __VA_ARGS__                                             \
 };