diff mbox series

[FFmpeg-devel,v2] avformat/wavdec: Fix reading files with id3v2 apic before fmt tag

Message ID HE1PR0301MB215469B69C9D56EFC4F0A1A78F4B9@HE1PR0301MB2154.eurprd03.prod.outlook.com
State Accepted
Commit e1f41b4fdc5d3e8813d8f9d4d81899bd4e6df31b
Headers show
Series [FFmpeg-devel,v2] avformat/wavdec: Fix reading files with id3v2 apic before fmt tag | expand

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate success Make fate finished
andriy/PPC64_make success Make finished
andriy/PPC64_make_fate success Make fate finished

Commit Message

Andreas Rheinhardt April 17, 2021, 1:33 a.m. UTC
Up until now the cover images will get the stream index 0 in this case,
violating the hardcoded assumption that this is the index of the audio
stream. Fix this by creating the audio stream first; this is also in
line with the expectations of ff_pcm_read_seek() and
ff_spdif_read_packet(). It also simplifies the code to parse the fmt and
xma2 tags.

Fixes #8540; regression since f5aad350d3695b5b16e7d135154a4c61e4dce9d8.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavformat/wavdec.c | 78 ++++++++++++++++++++++----------------------
 1 file changed, 39 insertions(+), 39 deletions(-)

Comments

Andreas Rheinhardt April 17, 2021, 11:49 p.m. UTC | #1
Andreas Rheinhardt:
> Up until now the cover images will get the stream index 0 in this case,
> violating the hardcoded assumption that this is the index of the audio
> stream. Fix this by creating the audio stream first; this is also in
> line with the expectations of ff_pcm_read_seek() and
> ff_spdif_read_packet(). It also simplifies the code to parse the fmt and
> xma2 tags.
> 
> Fixes #8540; regression since f5aad350d3695b5b16e7d135154a4c61e4dce9d8.
> 
> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
> ---
>  libavformat/wavdec.c | 78 ++++++++++++++++++++++----------------------
>  1 file changed, 39 insertions(+), 39 deletions(-)
> 
> diff --git a/libavformat/wavdec.c b/libavformat/wavdec.c
> index 8214ab8498..791ae23b4a 100644
> --- a/libavformat/wavdec.c
> +++ b/libavformat/wavdec.c
> @@ -49,6 +49,7 @@ typedef struct WAVDemuxContext {
>      const AVClass *class;
>      int64_t data_end;
>      int w64;
> +    AVStream *vst;
>      int64_t smv_data_ofs;
>      int smv_block_size;
>      int smv_frames_per_jpeg;
> @@ -170,30 +171,26 @@ static void handle_stream_probing(AVStream *st)
>      }
>  }
>  
> -static int wav_parse_fmt_tag(AVFormatContext *s, int64_t size, AVStream **st)
> +static int wav_parse_fmt_tag(AVFormatContext *s, int64_t size, AVStream *st)
>  {
>      AVIOContext *pb = s->pb;
>      WAVDemuxContext *wav = s->priv_data;
>      int ret;
>  
>      /* parse fmt header */
> -    *st = avformat_new_stream(s, NULL);
> -    if (!*st)
> -        return AVERROR(ENOMEM);
> -
> -    ret = ff_get_wav_header(s, pb, (*st)->codecpar, size, wav->rifx);
> +    ret = ff_get_wav_header(s, pb, st->codecpar, size, wav->rifx);
>      if (ret < 0)
>          return ret;
> -    handle_stream_probing(*st);
> +    handle_stream_probing(st);
>  
> -    (*st)->need_parsing = AVSTREAM_PARSE_FULL_RAW;
> +    st->need_parsing = AVSTREAM_PARSE_FULL_RAW;
>  
> -    avpriv_set_pts_info(*st, 64, 1, (*st)->codecpar->sample_rate);
> +    avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
>  
>      return 0;
>  }
>  
> -static int wav_parse_xma2_tag(AVFormatContext *s, int64_t size, AVStream **st)
> +static int wav_parse_xma2_tag(AVFormatContext *s, int64_t size, AVStream *st)
>  {
>      AVIOContext *pb = s->pb;
>      int version, num_streams, i, channels = 0, ret;
> @@ -201,13 +198,9 @@ static int wav_parse_xma2_tag(AVFormatContext *s, int64_t size, AVStream **st)
>      if (size < 36)
>          return AVERROR_INVALIDDATA;
>  
> -    *st = avformat_new_stream(s, NULL);
> -    if (!*st)
> -        return AVERROR(ENOMEM);
> -
> -    (*st)->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
> -    (*st)->codecpar->codec_id   = AV_CODEC_ID_XMA2;
> -    (*st)->need_parsing         = AVSTREAM_PARSE_FULL_RAW;
> +    st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
> +    st->codecpar->codec_id   = AV_CODEC_ID_XMA2;
> +    st->need_parsing         = AVSTREAM_PARSE_FULL_RAW;
>  
>      version = avio_r8(pb);
>      if (version != 3 && version != 4)
> @@ -216,26 +209,26 @@ static int wav_parse_xma2_tag(AVFormatContext *s, int64_t size, AVStream **st)
>      if (size != (32 + ((version==3)?0:8) + 4*num_streams))
>          return AVERROR_INVALIDDATA;
>      avio_skip(pb, 10);
> -    (*st)->codecpar->sample_rate = avio_rb32(pb);
> +    st->codecpar->sample_rate = avio_rb32(pb);
>      if (version == 4)
>          avio_skip(pb, 8);
>      avio_skip(pb, 4);
> -    (*st)->duration = avio_rb32(pb);
> +    st->duration = avio_rb32(pb);
>      avio_skip(pb, 8);
>  
>      for (i = 0; i < num_streams; i++) {
>          channels += avio_r8(pb);
>          avio_skip(pb, 3);
>      }
> -    (*st)->codecpar->channels = channels;
> +    st->codecpar->channels = channels;
>  
> -    if ((*st)->codecpar->channels <= 0 || (*st)->codecpar->sample_rate <= 0)
> +    if (st->codecpar->channels <= 0 || st->codecpar->sample_rate <= 0)
>          return AVERROR_INVALIDDATA;
>  
> -    avpriv_set_pts_info(*st, 64, 1, (*st)->codecpar->sample_rate);
> +    avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
>  
>      avio_seek(pb, -size, SEEK_CUR);
> -    if ((ret = ff_get_extradata(s, (*st)->codecpar, pb, size)) < 0)
> +    if ((ret = ff_get_extradata(s, st->codecpar, pb, size)) < 0)
>          return ret;
>  
>      return 0;
> @@ -407,6 +400,11 @@ static int wav_read_header(AVFormatContext *s)
>  
>      }
>  
> +    /* Create the audio stream now so that its index is always zero */
> +    st = avformat_new_stream(s, NULL);
> +    if (!st)
> +        return AVERROR(ENOMEM);
> +
>      for (;;) {
>          AVStream *vst;
>          size         = next_tag(pb, &tag, wav->rifx);
> @@ -418,7 +416,7 @@ static int wav_read_header(AVFormatContext *s)
>          switch (tag) {
>          case MKTAG('f', 'm', 't', ' '):
>              /* only parse the first 'fmt ' tag found */
> -            if (!got_xma2 && !got_fmt && (ret = wav_parse_fmt_tag(s, size, &st)) < 0) {
> +            if (!got_xma2 && !got_fmt && (ret = wav_parse_fmt_tag(s, size, st)) < 0) {
>                  return ret;
>              } else if (got_fmt)
>                  av_log(s, AV_LOG_WARNING, "found more than one 'fmt ' tag\n");
> @@ -427,7 +425,7 @@ static int wav_read_header(AVFormatContext *s)
>              break;
>          case MKTAG('X', 'M', 'A', '2'):
>              /* only parse the first 'XMA2' tag found */
> -            if (!got_fmt && !got_xma2 && (ret = wav_parse_xma2_tag(s, size, &st)) < 0) {
> +            if (!got_fmt && !got_xma2 && (ret = wav_parse_xma2_tag(s, size, st)) < 0) {
>                  return ret;
>              } else if (got_xma2)
>                  av_log(s, AV_LOG_WARNING, "found more than one 'XMA2' tag\n");
> @@ -484,6 +482,7 @@ static int wav_read_header(AVFormatContext *s)
>              vst = avformat_new_stream(s, NULL);
>              if (!vst)
>                  return AVERROR(ENOMEM);
> +            wav->vst = vst;
>              avio_r8(pb);
>              vst->id = 1;
>              vst->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
> @@ -693,23 +692,24 @@ static int wav_read_packet(AVFormatContext *s, AVPacket *pkt)
>  {
>      int ret, size;
>      int64_t left;
> -    AVStream *st;
>      WAVDemuxContext *wav = s->priv_data;
> +    AVStream *st = s->streams[0];
>  
>      if (CONFIG_SPDIF_DEMUXER && wav->spdif == 1)
>          return ff_spdif_read_packet(s, pkt);
>  
>      if (wav->smv_data_ofs > 0) {
>          int64_t audio_dts, video_dts;
> +        AVStream *vst = wav->vst;
>  smv_retry:
> -        audio_dts = (int32_t)s->streams[0]->cur_dts;
> -        video_dts = (int32_t)s->streams[1]->cur_dts;
> +        audio_dts = (int32_t)st->cur_dts;
> +        video_dts = (int32_t)vst->cur_dts;
>  
>          if (audio_dts != AV_NOPTS_VALUE && video_dts != AV_NOPTS_VALUE) {
>              /*We always return a video frame first to get the pixel format first*/
>              wav->smv_last_stream = wav->smv_given_first ?
> -                av_compare_ts(video_dts, s->streams[1]->time_base,
> -                              audio_dts, s->streams[0]->time_base) > 0 : 0;
> +                av_compare_ts(video_dts, vst->time_base,
> +                              audio_dts,  st->time_base) > 0 : 0;
>              wav->smv_given_first = 1;
>          }
>          wav->smv_last_stream = !wav->smv_last_stream;
> @@ -732,7 +732,7 @@ smv_retry:
>              pkt->duration = wav->smv_frames_per_jpeg;
>              wav->smv_block++;
>  
> -            pkt->stream_index = 1;
> +            pkt->stream_index = vst->index;
>  smv_out:
>              avio_seek(s->pb, old_pos, SEEK_SET);
>              if (ret == AVERROR_EOF) {
> @@ -743,8 +743,6 @@ smv_out:
>          }
>      }
>  
> -    st = s->streams[0];
> -
>      left = wav->data_end - avio_tell(s->pb);
>      if (wav->ignore_length)
>          left = INT_MAX;
> @@ -781,22 +779,24 @@ static int wav_read_seek(AVFormatContext *s,
>                           int stream_index, int64_t timestamp, int flags)
>  {
>      WAVDemuxContext *wav = s->priv_data;
> -    AVStream *st;
> +    AVStream *ast = s->streams[0], *vst = wav->vst;
>      wav->smv_eof = 0;
>      wav->audio_eof = 0;
> +
> +    if (stream_index != 0 && (!vst || stream_index != vst->index))
> +        return AVERROR(EINVAL);
>      if (wav->smv_data_ofs > 0) {
>          int64_t smv_timestamp = timestamp;
>          if (stream_index == 0)
> -            smv_timestamp = av_rescale_q(timestamp, s->streams[0]->time_base, s->streams[1]->time_base);
> +            smv_timestamp = av_rescale_q(timestamp, ast->time_base, vst->time_base);
>          else
> -            timestamp = av_rescale_q(smv_timestamp, s->streams[1]->time_base, s->streams[0]->time_base);
> +            timestamp = av_rescale_q(smv_timestamp, vst->time_base, ast->time_base);
>          if (wav->smv_frames_per_jpeg > 0) {
>              wav->smv_block = smv_timestamp / wav->smv_frames_per_jpeg;
>          }
>      }
>  
> -    st = s->streams[0];
> -    switch (st->codecpar->codec_id) {
> +    switch (ast->codecpar->codec_id) {
>      case AV_CODEC_ID_MP2:
>      case AV_CODEC_ID_MP3:
>      case AV_CODEC_ID_AC3:
> @@ -807,7 +807,7 @@ static int wav_read_seek(AVFormatContext *s,
>      default:
>          break;
>      }
> -    return ff_pcm_read_seek(s, stream_index, timestamp, flags);
> +    return ff_pcm_read_seek(s, 0, timestamp, flags);
>  }
>  
>  static const AVClass wav_demuxer_class = {
> 
Will apply unless there are objections.

- Andreas
Paul B Mahol April 18, 2021, 6:49 a.m. UTC | #2
Why you put nonsense regression part in log.

That is very unfriendly behavior from you.

Noted.

On Sun, Apr 18, 2021 at 1:50 AM Andreas Rheinhardt <
andreas.rheinhardt@outlook.com> wrote:

> Andreas Rheinhardt:
> > Up until now the cover images will get the stream index 0 in this case,
> > violating the hardcoded assumption that this is the index of the audio
> > stream. Fix this by creating the audio stream first; this is also in
> > line with the expectations of ff_pcm_read_seek() and
> > ff_spdif_read_packet(). It also simplifies the code to parse the fmt and
> > xma2 tags.
> >
> > Fixes #8540; regression since f5aad350d3695b5b16e7d135154a4c61e4dce9d8.
> >
> > Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
> > ---
> >  libavformat/wavdec.c | 78 ++++++++++++++++++++++----------------------
> >  1 file changed, 39 insertions(+), 39 deletions(-)
> >
> > diff --git a/libavformat/wavdec.c b/libavformat/wavdec.c
> > index 8214ab8498..791ae23b4a 100644
> > --- a/libavformat/wavdec.c
> > +++ b/libavformat/wavdec.c
> > @@ -49,6 +49,7 @@ typedef struct WAVDemuxContext {
> >      const AVClass *class;
> >      int64_t data_end;
> >      int w64;
> > +    AVStream *vst;
> >      int64_t smv_data_ofs;
> >      int smv_block_size;
> >      int smv_frames_per_jpeg;
> > @@ -170,30 +171,26 @@ static void handle_stream_probing(AVStream *st)
> >      }
> >  }
> >
> > -static int wav_parse_fmt_tag(AVFormatContext *s, int64_t size, AVStream
> **st)
> > +static int wav_parse_fmt_tag(AVFormatContext *s, int64_t size, AVStream
> *st)
> >  {
> >      AVIOContext *pb = s->pb;
> >      WAVDemuxContext *wav = s->priv_data;
> >      int ret;
> >
> >      /* parse fmt header */
> > -    *st = avformat_new_stream(s, NULL);
> > -    if (!*st)
> > -        return AVERROR(ENOMEM);
> > -
> > -    ret = ff_get_wav_header(s, pb, (*st)->codecpar, size, wav->rifx);
> > +    ret = ff_get_wav_header(s, pb, st->codecpar, size, wav->rifx);
> >      if (ret < 0)
> >          return ret;
> > -    handle_stream_probing(*st);
> > +    handle_stream_probing(st);
> >
> > -    (*st)->need_parsing = AVSTREAM_PARSE_FULL_RAW;
> > +    st->need_parsing = AVSTREAM_PARSE_FULL_RAW;
> >
> > -    avpriv_set_pts_info(*st, 64, 1, (*st)->codecpar->sample_rate);
> > +    avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
> >
> >      return 0;
> >  }
> >
> > -static int wav_parse_xma2_tag(AVFormatContext *s, int64_t size,
> AVStream **st)
> > +static int wav_parse_xma2_tag(AVFormatContext *s, int64_t size,
> AVStream *st)
> >  {
> >      AVIOContext *pb = s->pb;
> >      int version, num_streams, i, channels = 0, ret;
> > @@ -201,13 +198,9 @@ static int wav_parse_xma2_tag(AVFormatContext *s,
> int64_t size, AVStream **st)
> >      if (size < 36)
> >          return AVERROR_INVALIDDATA;
> >
> > -    *st = avformat_new_stream(s, NULL);
> > -    if (!*st)
> > -        return AVERROR(ENOMEM);
> > -
> > -    (*st)->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
> > -    (*st)->codecpar->codec_id   = AV_CODEC_ID_XMA2;
> > -    (*st)->need_parsing         = AVSTREAM_PARSE_FULL_RAW;
> > +    st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
> > +    st->codecpar->codec_id   = AV_CODEC_ID_XMA2;
> > +    st->need_parsing         = AVSTREAM_PARSE_FULL_RAW;
> >
> >      version = avio_r8(pb);
> >      if (version != 3 && version != 4)
> > @@ -216,26 +209,26 @@ static int wav_parse_xma2_tag(AVFormatContext *s,
> int64_t size, AVStream **st)
> >      if (size != (32 + ((version==3)?0:8) + 4*num_streams))
> >          return AVERROR_INVALIDDATA;
> >      avio_skip(pb, 10);
> > -    (*st)->codecpar->sample_rate = avio_rb32(pb);
> > +    st->codecpar->sample_rate = avio_rb32(pb);
> >      if (version == 4)
> >          avio_skip(pb, 8);
> >      avio_skip(pb, 4);
> > -    (*st)->duration = avio_rb32(pb);
> > +    st->duration = avio_rb32(pb);
> >      avio_skip(pb, 8);
> >
> >      for (i = 0; i < num_streams; i++) {
> >          channels += avio_r8(pb);
> >          avio_skip(pb, 3);
> >      }
> > -    (*st)->codecpar->channels = channels;
> > +    st->codecpar->channels = channels;
> >
> > -    if ((*st)->codecpar->channels <= 0 || (*st)->codecpar->sample_rate
> <= 0)
> > +    if (st->codecpar->channels <= 0 || st->codecpar->sample_rate <= 0)
> >          return AVERROR_INVALIDDATA;
> >
> > -    avpriv_set_pts_info(*st, 64, 1, (*st)->codecpar->sample_rate);
> > +    avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
> >
> >      avio_seek(pb, -size, SEEK_CUR);
> > -    if ((ret = ff_get_extradata(s, (*st)->codecpar, pb, size)) < 0)
> > +    if ((ret = ff_get_extradata(s, st->codecpar, pb, size)) < 0)
> >          return ret;
> >
> >      return 0;
> > @@ -407,6 +400,11 @@ static int wav_read_header(AVFormatContext *s)
> >
> >      }
> >
> > +    /* Create the audio stream now so that its index is always zero */
> > +    st = avformat_new_stream(s, NULL);
> > +    if (!st)
> > +        return AVERROR(ENOMEM);
> > +
> >      for (;;) {
> >          AVStream *vst;
> >          size         = next_tag(pb, &tag, wav->rifx);
> > @@ -418,7 +416,7 @@ static int wav_read_header(AVFormatContext *s)
> >          switch (tag) {
> >          case MKTAG('f', 'm', 't', ' '):
> >              /* only parse the first 'fmt ' tag found */
> > -            if (!got_xma2 && !got_fmt && (ret = wav_parse_fmt_tag(s,
> size, &st)) < 0) {
> > +            if (!got_xma2 && !got_fmt && (ret = wav_parse_fmt_tag(s,
> size, st)) < 0) {
> >                  return ret;
> >              } else if (got_fmt)
> >                  av_log(s, AV_LOG_WARNING, "found more than one 'fmt '
> tag\n");
> > @@ -427,7 +425,7 @@ static int wav_read_header(AVFormatContext *s)
> >              break;
> >          case MKTAG('X', 'M', 'A', '2'):
> >              /* only parse the first 'XMA2' tag found */
> > -            if (!got_fmt && !got_xma2 && (ret = wav_parse_xma2_tag(s,
> size, &st)) < 0) {
> > +            if (!got_fmt && !got_xma2 && (ret = wav_parse_xma2_tag(s,
> size, st)) < 0) {
> >                  return ret;
> >              } else if (got_xma2)
> >                  av_log(s, AV_LOG_WARNING, "found more than one 'XMA2'
> tag\n");
> > @@ -484,6 +482,7 @@ static int wav_read_header(AVFormatContext *s)
> >              vst = avformat_new_stream(s, NULL);
> >              if (!vst)
> >                  return AVERROR(ENOMEM);
> > +            wav->vst = vst;
> >              avio_r8(pb);
> >              vst->id = 1;
> >              vst->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
> > @@ -693,23 +692,24 @@ static int wav_read_packet(AVFormatContext *s,
> AVPacket *pkt)
> >  {
> >      int ret, size;
> >      int64_t left;
> > -    AVStream *st;
> >      WAVDemuxContext *wav = s->priv_data;
> > +    AVStream *st = s->streams[0];
> >
> >      if (CONFIG_SPDIF_DEMUXER && wav->spdif == 1)
> >          return ff_spdif_read_packet(s, pkt);
> >
> >      if (wav->smv_data_ofs > 0) {
> >          int64_t audio_dts, video_dts;
> > +        AVStream *vst = wav->vst;
> >  smv_retry:
> > -        audio_dts = (int32_t)s->streams[0]->cur_dts;
> > -        video_dts = (int32_t)s->streams[1]->cur_dts;
> > +        audio_dts = (int32_t)st->cur_dts;
> > +        video_dts = (int32_t)vst->cur_dts;
> >
> >          if (audio_dts != AV_NOPTS_VALUE && video_dts != AV_NOPTS_VALUE)
> {
> >              /*We always return a video frame first to get the pixel
> format first*/
> >              wav->smv_last_stream = wav->smv_given_first ?
> > -                av_compare_ts(video_dts, s->streams[1]->time_base,
> > -                              audio_dts, s->streams[0]->time_base) > 0
> : 0;
> > +                av_compare_ts(video_dts, vst->time_base,
> > +                              audio_dts,  st->time_base) > 0 : 0;
> >              wav->smv_given_first = 1;
> >          }
> >          wav->smv_last_stream = !wav->smv_last_stream;
> > @@ -732,7 +732,7 @@ smv_retry:
> >              pkt->duration = wav->smv_frames_per_jpeg;
> >              wav->smv_block++;
> >
> > -            pkt->stream_index = 1;
> > +            pkt->stream_index = vst->index;
> >  smv_out:
> >              avio_seek(s->pb, old_pos, SEEK_SET);
> >              if (ret == AVERROR_EOF) {
> > @@ -743,8 +743,6 @@ smv_out:
> >          }
> >      }
> >
> > -    st = s->streams[0];
> > -
> >      left = wav->data_end - avio_tell(s->pb);
> >      if (wav->ignore_length)
> >          left = INT_MAX;
> > @@ -781,22 +779,24 @@ static int wav_read_seek(AVFormatContext *s,
> >                           int stream_index, int64_t timestamp, int flags)
> >  {
> >      WAVDemuxContext *wav = s->priv_data;
> > -    AVStream *st;
> > +    AVStream *ast = s->streams[0], *vst = wav->vst;
> >      wav->smv_eof = 0;
> >      wav->audio_eof = 0;
> > +
> > +    if (stream_index != 0 && (!vst || stream_index != vst->index))
> > +        return AVERROR(EINVAL);
> >      if (wav->smv_data_ofs > 0) {
> >          int64_t smv_timestamp = timestamp;
> >          if (stream_index == 0)
> > -            smv_timestamp = av_rescale_q(timestamp,
> s->streams[0]->time_base, s->streams[1]->time_base);
> > +            smv_timestamp = av_rescale_q(timestamp, ast->time_base,
> vst->time_base);
> >          else
> > -            timestamp = av_rescale_q(smv_timestamp,
> s->streams[1]->time_base, s->streams[0]->time_base);
> > +            timestamp = av_rescale_q(smv_timestamp, vst->time_base,
> ast->time_base);
> >          if (wav->smv_frames_per_jpeg > 0) {
> >              wav->smv_block = smv_timestamp / wav->smv_frames_per_jpeg;
> >          }
> >      }
> >
> > -    st = s->streams[0];
> > -    switch (st->codecpar->codec_id) {
> > +    switch (ast->codecpar->codec_id) {
> >      case AV_CODEC_ID_MP2:
> >      case AV_CODEC_ID_MP3:
> >      case AV_CODEC_ID_AC3:
> > @@ -807,7 +807,7 @@ static int wav_read_seek(AVFormatContext *s,
> >      default:
> >          break;
> >      }
> > -    return ff_pcm_read_seek(s, stream_index, timestamp, flags);
> > +    return ff_pcm_read_seek(s, 0, timestamp, flags);
> >  }
> >
> >  static const AVClass wav_demuxer_class = {
> >
> Will apply unless there are objections.
>
> - Andreas
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Andreas Rheinhardt April 18, 2021, 11:41 p.m. UTC | #3
Paul B Mahol:
> Why you put nonsense regression part in log.
> 
> That is very unfriendly behavior from you.
> 
> Noted.
> 

Before your patch, the audio in the files in question would work; with
your patch it doesn't any longer. That's a regression.

> On Sun, Apr 18, 2021 at 1:50 AM Andreas Rheinhardt <
> andreas.rheinhardt@outlook.com> wrote:
> 
>> Andreas Rheinhardt:
>>> Up until now the cover images will get the stream index 0 in this case,
>>> violating the hardcoded assumption that this is the index of the audio
>>> stream. Fix this by creating the audio stream first; this is also in
>>> line with the expectations of ff_pcm_read_seek() and
>>> ff_spdif_read_packet(). It also simplifies the code to parse the fmt and
>>> xma2 tags.
>>>
>>> Fixes #8540; regression since f5aad350d3695b5b16e7d135154a4c61e4dce9d8.
>>>
>>> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
>>> ---
>>>  libavformat/wavdec.c | 78 ++++++++++++++++++++++----------------------
>>>  1 file changed, 39 insertions(+), 39 deletions(-)
>>>
>>> diff --git a/libavformat/wavdec.c b/libavformat/wavdec.c
>>> index 8214ab8498..791ae23b4a 100644
>>> --- a/libavformat/wavdec.c
>>> +++ b/libavformat/wavdec.c
>>> @@ -49,6 +49,7 @@ typedef struct WAVDemuxContext {
>>>      const AVClass *class;
>>>      int64_t data_end;
>>>      int w64;
>>> +    AVStream *vst;
>>>      int64_t smv_data_ofs;
>>>      int smv_block_size;
>>>      int smv_frames_per_jpeg;
>>> @@ -170,30 +171,26 @@ static void handle_stream_probing(AVStream *st)
>>>      }
>>>  }
>>>
>>> -static int wav_parse_fmt_tag(AVFormatContext *s, int64_t size, AVStream
>> **st)
>>> +static int wav_parse_fmt_tag(AVFormatContext *s, int64_t size, AVStream
>> *st)
>>>  {
>>>      AVIOContext *pb = s->pb;
>>>      WAVDemuxContext *wav = s->priv_data;
>>>      int ret;
>>>
>>>      /* parse fmt header */
>>> -    *st = avformat_new_stream(s, NULL);
>>> -    if (!*st)
>>> -        return AVERROR(ENOMEM);
>>> -
>>> -    ret = ff_get_wav_header(s, pb, (*st)->codecpar, size, wav->rifx);
>>> +    ret = ff_get_wav_header(s, pb, st->codecpar, size, wav->rifx);
>>>      if (ret < 0)
>>>          return ret;
>>> -    handle_stream_probing(*st);
>>> +    handle_stream_probing(st);
>>>
>>> -    (*st)->need_parsing = AVSTREAM_PARSE_FULL_RAW;
>>> +    st->need_parsing = AVSTREAM_PARSE_FULL_RAW;
>>>
>>> -    avpriv_set_pts_info(*st, 64, 1, (*st)->codecpar->sample_rate);
>>> +    avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
>>>
>>>      return 0;
>>>  }
>>>
>>> -static int wav_parse_xma2_tag(AVFormatContext *s, int64_t size,
>> AVStream **st)
>>> +static int wav_parse_xma2_tag(AVFormatContext *s, int64_t size,
>> AVStream *st)
>>>  {
>>>      AVIOContext *pb = s->pb;
>>>      int version, num_streams, i, channels = 0, ret;
>>> @@ -201,13 +198,9 @@ static int wav_parse_xma2_tag(AVFormatContext *s,
>> int64_t size, AVStream **st)
>>>      if (size < 36)
>>>          return AVERROR_INVALIDDATA;
>>>
>>> -    *st = avformat_new_stream(s, NULL);
>>> -    if (!*st)
>>> -        return AVERROR(ENOMEM);
>>> -
>>> -    (*st)->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
>>> -    (*st)->codecpar->codec_id   = AV_CODEC_ID_XMA2;
>>> -    (*st)->need_parsing         = AVSTREAM_PARSE_FULL_RAW;
>>> +    st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
>>> +    st->codecpar->codec_id   = AV_CODEC_ID_XMA2;
>>> +    st->need_parsing         = AVSTREAM_PARSE_FULL_RAW;
>>>
>>>      version = avio_r8(pb);
>>>      if (version != 3 && version != 4)
>>> @@ -216,26 +209,26 @@ static int wav_parse_xma2_tag(AVFormatContext *s,
>> int64_t size, AVStream **st)
>>>      if (size != (32 + ((version==3)?0:8) + 4*num_streams))
>>>          return AVERROR_INVALIDDATA;
>>>      avio_skip(pb, 10);
>>> -    (*st)->codecpar->sample_rate = avio_rb32(pb);
>>> +    st->codecpar->sample_rate = avio_rb32(pb);
>>>      if (version == 4)
>>>          avio_skip(pb, 8);
>>>      avio_skip(pb, 4);
>>> -    (*st)->duration = avio_rb32(pb);
>>> +    st->duration = avio_rb32(pb);
>>>      avio_skip(pb, 8);
>>>
>>>      for (i = 0; i < num_streams; i++) {
>>>          channels += avio_r8(pb);
>>>          avio_skip(pb, 3);
>>>      }
>>> -    (*st)->codecpar->channels = channels;
>>> +    st->codecpar->channels = channels;
>>>
>>> -    if ((*st)->codecpar->channels <= 0 || (*st)->codecpar->sample_rate
>> <= 0)
>>> +    if (st->codecpar->channels <= 0 || st->codecpar->sample_rate <= 0)
>>>          return AVERROR_INVALIDDATA;
>>>
>>> -    avpriv_set_pts_info(*st, 64, 1, (*st)->codecpar->sample_rate);
>>> +    avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
>>>
>>>      avio_seek(pb, -size, SEEK_CUR);
>>> -    if ((ret = ff_get_extradata(s, (*st)->codecpar, pb, size)) < 0)
>>> +    if ((ret = ff_get_extradata(s, st->codecpar, pb, size)) < 0)
>>>          return ret;
>>>
>>>      return 0;
>>> @@ -407,6 +400,11 @@ static int wav_read_header(AVFormatContext *s)
>>>
>>>      }
>>>
>>> +    /* Create the audio stream now so that its index is always zero */
>>> +    st = avformat_new_stream(s, NULL);
>>> +    if (!st)
>>> +        return AVERROR(ENOMEM);
>>> +
>>>      for (;;) {
>>>          AVStream *vst;
>>>          size         = next_tag(pb, &tag, wav->rifx);
>>> @@ -418,7 +416,7 @@ static int wav_read_header(AVFormatContext *s)
>>>          switch (tag) {
>>>          case MKTAG('f', 'm', 't', ' '):
>>>              /* only parse the first 'fmt ' tag found */
>>> -            if (!got_xma2 && !got_fmt && (ret = wav_parse_fmt_tag(s,
>> size, &st)) < 0) {
>>> +            if (!got_xma2 && !got_fmt && (ret = wav_parse_fmt_tag(s,
>> size, st)) < 0) {
>>>                  return ret;
>>>              } else if (got_fmt)
>>>                  av_log(s, AV_LOG_WARNING, "found more than one 'fmt '
>> tag\n");
>>> @@ -427,7 +425,7 @@ static int wav_read_header(AVFormatContext *s)
>>>              break;
>>>          case MKTAG('X', 'M', 'A', '2'):
>>>              /* only parse the first 'XMA2' tag found */
>>> -            if (!got_fmt && !got_xma2 && (ret = wav_parse_xma2_tag(s,
>> size, &st)) < 0) {
>>> +            if (!got_fmt && !got_xma2 && (ret = wav_parse_xma2_tag(s,
>> size, st)) < 0) {
>>>                  return ret;
>>>              } else if (got_xma2)
>>>                  av_log(s, AV_LOG_WARNING, "found more than one 'XMA2'
>> tag\n");
>>> @@ -484,6 +482,7 @@ static int wav_read_header(AVFormatContext *s)
>>>              vst = avformat_new_stream(s, NULL);
>>>              if (!vst)
>>>                  return AVERROR(ENOMEM);
>>> +            wav->vst = vst;
>>>              avio_r8(pb);
>>>              vst->id = 1;
>>>              vst->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
>>> @@ -693,23 +692,24 @@ static int wav_read_packet(AVFormatContext *s,
>> AVPacket *pkt)
>>>  {
>>>      int ret, size;
>>>      int64_t left;
>>> -    AVStream *st;
>>>      WAVDemuxContext *wav = s->priv_data;
>>> +    AVStream *st = s->streams[0];
>>>
>>>      if (CONFIG_SPDIF_DEMUXER && wav->spdif == 1)
>>>          return ff_spdif_read_packet(s, pkt);
>>>
>>>      if (wav->smv_data_ofs > 0) {
>>>          int64_t audio_dts, video_dts;
>>> +        AVStream *vst = wav->vst;
>>>  smv_retry:
>>> -        audio_dts = (int32_t)s->streams[0]->cur_dts;
>>> -        video_dts = (int32_t)s->streams[1]->cur_dts;
>>> +        audio_dts = (int32_t)st->cur_dts;
>>> +        video_dts = (int32_t)vst->cur_dts;
>>>
>>>          if (audio_dts != AV_NOPTS_VALUE && video_dts != AV_NOPTS_VALUE)
>> {
>>>              /*We always return a video frame first to get the pixel
>> format first*/
>>>              wav->smv_last_stream = wav->smv_given_first ?
>>> -                av_compare_ts(video_dts, s->streams[1]->time_base,
>>> -                              audio_dts, s->streams[0]->time_base) > 0
>> : 0;
>>> +                av_compare_ts(video_dts, vst->time_base,
>>> +                              audio_dts,  st->time_base) > 0 : 0;
>>>              wav->smv_given_first = 1;
>>>          }
>>>          wav->smv_last_stream = !wav->smv_last_stream;
>>> @@ -732,7 +732,7 @@ smv_retry:
>>>              pkt->duration = wav->smv_frames_per_jpeg;
>>>              wav->smv_block++;
>>>
>>> -            pkt->stream_index = 1;
>>> +            pkt->stream_index = vst->index;
>>>  smv_out:
>>>              avio_seek(s->pb, old_pos, SEEK_SET);
>>>              if (ret == AVERROR_EOF) {
>>> @@ -743,8 +743,6 @@ smv_out:
>>>          }
>>>      }
>>>
>>> -    st = s->streams[0];
>>> -
>>>      left = wav->data_end - avio_tell(s->pb);
>>>      if (wav->ignore_length)
>>>          left = INT_MAX;
>>> @@ -781,22 +779,24 @@ static int wav_read_seek(AVFormatContext *s,
>>>                           int stream_index, int64_t timestamp, int flags)
>>>  {
>>>      WAVDemuxContext *wav = s->priv_data;
>>> -    AVStream *st;
>>> +    AVStream *ast = s->streams[0], *vst = wav->vst;
>>>      wav->smv_eof = 0;
>>>      wav->audio_eof = 0;
>>> +
>>> +    if (stream_index != 0 && (!vst || stream_index != vst->index))
>>> +        return AVERROR(EINVAL);
>>>      if (wav->smv_data_ofs > 0) {
>>>          int64_t smv_timestamp = timestamp;
>>>          if (stream_index == 0)
>>> -            smv_timestamp = av_rescale_q(timestamp,
>> s->streams[0]->time_base, s->streams[1]->time_base);
>>> +            smv_timestamp = av_rescale_q(timestamp, ast->time_base,
>> vst->time_base);
>>>          else
>>> -            timestamp = av_rescale_q(smv_timestamp,
>> s->streams[1]->time_base, s->streams[0]->time_base);
>>> +            timestamp = av_rescale_q(smv_timestamp, vst->time_base,
>> ast->time_base);
>>>          if (wav->smv_frames_per_jpeg > 0) {
>>>              wav->smv_block = smv_timestamp / wav->smv_frames_per_jpeg;
>>>          }
>>>      }
>>>
>>> -    st = s->streams[0];
>>> -    switch (st->codecpar->codec_id) {
>>> +    switch (ast->codecpar->codec_id) {
>>>      case AV_CODEC_ID_MP2:
>>>      case AV_CODEC_ID_MP3:
>>>      case AV_CODEC_ID_AC3:
>>> @@ -807,7 +807,7 @@ static int wav_read_seek(AVFormatContext *s,
>>>      default:
>>>          break;
>>>      }
>>> -    return ff_pcm_read_seek(s, stream_index, timestamp, flags);
>>> +    return ff_pcm_read_seek(s, 0, timestamp, flags);
>>>  }
>>>
>>>  static const AVClass wav_demuxer_class = {
>>>
>> Will apply unless there are objections.
>>
>> - Andreas
>> _______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel@ffmpeg.org
>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>> To unsubscribe, visit link above, or email
>> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
diff mbox series

Patch

diff --git a/libavformat/wavdec.c b/libavformat/wavdec.c
index 8214ab8498..791ae23b4a 100644
--- a/libavformat/wavdec.c
+++ b/libavformat/wavdec.c
@@ -49,6 +49,7 @@  typedef struct WAVDemuxContext {
     const AVClass *class;
     int64_t data_end;
     int w64;
+    AVStream *vst;
     int64_t smv_data_ofs;
     int smv_block_size;
     int smv_frames_per_jpeg;
@@ -170,30 +171,26 @@  static void handle_stream_probing(AVStream *st)
     }
 }
 
-static int wav_parse_fmt_tag(AVFormatContext *s, int64_t size, AVStream **st)
+static int wav_parse_fmt_tag(AVFormatContext *s, int64_t size, AVStream *st)
 {
     AVIOContext *pb = s->pb;
     WAVDemuxContext *wav = s->priv_data;
     int ret;
 
     /* parse fmt header */
-    *st = avformat_new_stream(s, NULL);
-    if (!*st)
-        return AVERROR(ENOMEM);
-
-    ret = ff_get_wav_header(s, pb, (*st)->codecpar, size, wav->rifx);
+    ret = ff_get_wav_header(s, pb, st->codecpar, size, wav->rifx);
     if (ret < 0)
         return ret;
-    handle_stream_probing(*st);
+    handle_stream_probing(st);
 
-    (*st)->need_parsing = AVSTREAM_PARSE_FULL_RAW;
+    st->need_parsing = AVSTREAM_PARSE_FULL_RAW;
 
-    avpriv_set_pts_info(*st, 64, 1, (*st)->codecpar->sample_rate);
+    avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
 
     return 0;
 }
 
-static int wav_parse_xma2_tag(AVFormatContext *s, int64_t size, AVStream **st)
+static int wav_parse_xma2_tag(AVFormatContext *s, int64_t size, AVStream *st)
 {
     AVIOContext *pb = s->pb;
     int version, num_streams, i, channels = 0, ret;
@@ -201,13 +198,9 @@  static int wav_parse_xma2_tag(AVFormatContext *s, int64_t size, AVStream **st)
     if (size < 36)
         return AVERROR_INVALIDDATA;
 
-    *st = avformat_new_stream(s, NULL);
-    if (!*st)
-        return AVERROR(ENOMEM);
-
-    (*st)->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
-    (*st)->codecpar->codec_id   = AV_CODEC_ID_XMA2;
-    (*st)->need_parsing         = AVSTREAM_PARSE_FULL_RAW;
+    st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
+    st->codecpar->codec_id   = AV_CODEC_ID_XMA2;
+    st->need_parsing         = AVSTREAM_PARSE_FULL_RAW;
 
     version = avio_r8(pb);
     if (version != 3 && version != 4)
@@ -216,26 +209,26 @@  static int wav_parse_xma2_tag(AVFormatContext *s, int64_t size, AVStream **st)
     if (size != (32 + ((version==3)?0:8) + 4*num_streams))
         return AVERROR_INVALIDDATA;
     avio_skip(pb, 10);
-    (*st)->codecpar->sample_rate = avio_rb32(pb);
+    st->codecpar->sample_rate = avio_rb32(pb);
     if (version == 4)
         avio_skip(pb, 8);
     avio_skip(pb, 4);
-    (*st)->duration = avio_rb32(pb);
+    st->duration = avio_rb32(pb);
     avio_skip(pb, 8);
 
     for (i = 0; i < num_streams; i++) {
         channels += avio_r8(pb);
         avio_skip(pb, 3);
     }
-    (*st)->codecpar->channels = channels;
+    st->codecpar->channels = channels;
 
-    if ((*st)->codecpar->channels <= 0 || (*st)->codecpar->sample_rate <= 0)
+    if (st->codecpar->channels <= 0 || st->codecpar->sample_rate <= 0)
         return AVERROR_INVALIDDATA;
 
-    avpriv_set_pts_info(*st, 64, 1, (*st)->codecpar->sample_rate);
+    avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
 
     avio_seek(pb, -size, SEEK_CUR);
-    if ((ret = ff_get_extradata(s, (*st)->codecpar, pb, size)) < 0)
+    if ((ret = ff_get_extradata(s, st->codecpar, pb, size)) < 0)
         return ret;
 
     return 0;
@@ -407,6 +400,11 @@  static int wav_read_header(AVFormatContext *s)
 
     }
 
+    /* Create the audio stream now so that its index is always zero */
+    st = avformat_new_stream(s, NULL);
+    if (!st)
+        return AVERROR(ENOMEM);
+
     for (;;) {
         AVStream *vst;
         size         = next_tag(pb, &tag, wav->rifx);
@@ -418,7 +416,7 @@  static int wav_read_header(AVFormatContext *s)
         switch (tag) {
         case MKTAG('f', 'm', 't', ' '):
             /* only parse the first 'fmt ' tag found */
-            if (!got_xma2 && !got_fmt && (ret = wav_parse_fmt_tag(s, size, &st)) < 0) {
+            if (!got_xma2 && !got_fmt && (ret = wav_parse_fmt_tag(s, size, st)) < 0) {
                 return ret;
             } else if (got_fmt)
                 av_log(s, AV_LOG_WARNING, "found more than one 'fmt ' tag\n");
@@ -427,7 +425,7 @@  static int wav_read_header(AVFormatContext *s)
             break;
         case MKTAG('X', 'M', 'A', '2'):
             /* only parse the first 'XMA2' tag found */
-            if (!got_fmt && !got_xma2 && (ret = wav_parse_xma2_tag(s, size, &st)) < 0) {
+            if (!got_fmt && !got_xma2 && (ret = wav_parse_xma2_tag(s, size, st)) < 0) {
                 return ret;
             } else if (got_xma2)
                 av_log(s, AV_LOG_WARNING, "found more than one 'XMA2' tag\n");
@@ -484,6 +482,7 @@  static int wav_read_header(AVFormatContext *s)
             vst = avformat_new_stream(s, NULL);
             if (!vst)
                 return AVERROR(ENOMEM);
+            wav->vst = vst;
             avio_r8(pb);
             vst->id = 1;
             vst->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
@@ -693,23 +692,24 @@  static int wav_read_packet(AVFormatContext *s, AVPacket *pkt)
 {
     int ret, size;
     int64_t left;
-    AVStream *st;
     WAVDemuxContext *wav = s->priv_data;
+    AVStream *st = s->streams[0];
 
     if (CONFIG_SPDIF_DEMUXER && wav->spdif == 1)
         return ff_spdif_read_packet(s, pkt);
 
     if (wav->smv_data_ofs > 0) {
         int64_t audio_dts, video_dts;
+        AVStream *vst = wav->vst;
 smv_retry:
-        audio_dts = (int32_t)s->streams[0]->cur_dts;
-        video_dts = (int32_t)s->streams[1]->cur_dts;
+        audio_dts = (int32_t)st->cur_dts;
+        video_dts = (int32_t)vst->cur_dts;
 
         if (audio_dts != AV_NOPTS_VALUE && video_dts != AV_NOPTS_VALUE) {
             /*We always return a video frame first to get the pixel format first*/
             wav->smv_last_stream = wav->smv_given_first ?
-                av_compare_ts(video_dts, s->streams[1]->time_base,
-                              audio_dts, s->streams[0]->time_base) > 0 : 0;
+                av_compare_ts(video_dts, vst->time_base,
+                              audio_dts,  st->time_base) > 0 : 0;
             wav->smv_given_first = 1;
         }
         wav->smv_last_stream = !wav->smv_last_stream;
@@ -732,7 +732,7 @@  smv_retry:
             pkt->duration = wav->smv_frames_per_jpeg;
             wav->smv_block++;
 
-            pkt->stream_index = 1;
+            pkt->stream_index = vst->index;
 smv_out:
             avio_seek(s->pb, old_pos, SEEK_SET);
             if (ret == AVERROR_EOF) {
@@ -743,8 +743,6 @@  smv_out:
         }
     }
 
-    st = s->streams[0];
-
     left = wav->data_end - avio_tell(s->pb);
     if (wav->ignore_length)
         left = INT_MAX;
@@ -781,22 +779,24 @@  static int wav_read_seek(AVFormatContext *s,
                          int stream_index, int64_t timestamp, int flags)
 {
     WAVDemuxContext *wav = s->priv_data;
-    AVStream *st;
+    AVStream *ast = s->streams[0], *vst = wav->vst;
     wav->smv_eof = 0;
     wav->audio_eof = 0;
+
+    if (stream_index != 0 && (!vst || stream_index != vst->index))
+        return AVERROR(EINVAL);
     if (wav->smv_data_ofs > 0) {
         int64_t smv_timestamp = timestamp;
         if (stream_index == 0)
-            smv_timestamp = av_rescale_q(timestamp, s->streams[0]->time_base, s->streams[1]->time_base);
+            smv_timestamp = av_rescale_q(timestamp, ast->time_base, vst->time_base);
         else
-            timestamp = av_rescale_q(smv_timestamp, s->streams[1]->time_base, s->streams[0]->time_base);
+            timestamp = av_rescale_q(smv_timestamp, vst->time_base, ast->time_base);
         if (wav->smv_frames_per_jpeg > 0) {
             wav->smv_block = smv_timestamp / wav->smv_frames_per_jpeg;
         }
     }
 
-    st = s->streams[0];
-    switch (st->codecpar->codec_id) {
+    switch (ast->codecpar->codec_id) {
     case AV_CODEC_ID_MP2:
     case AV_CODEC_ID_MP3:
     case AV_CODEC_ID_AC3:
@@ -807,7 +807,7 @@  static int wav_read_seek(AVFormatContext *s,
     default:
         break;
     }
-    return ff_pcm_read_seek(s, stream_index, timestamp, flags);
+    return ff_pcm_read_seek(s, 0, timestamp, flags);
 }
 
 static const AVClass wav_demuxer_class = {