diff mbox series

[FFmpeg-devel] avformat/wavdec: Fix reading files with id3v2 apic before fmt tag

Message ID VI1PR0301MB2159572CD211995F4CBDD8BE8F4C9@VI1PR0301MB2159.eurprd03.prod.outlook.com
State Superseded
Headers show
Series [FFmpeg-devel] avformat/wavdec: Fix reading files with id3v2 apic before fmt tag
Related show

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate success Make fate finished
andriy/PPC64_make success Make finished
andriy/PPC64_make_fate success Make fate finished

Commit Message

Andreas Rheinhardt April 16, 2021, 7:18 p.m. UTC
In this case the cover images will get the stream index 0, violating
the hardcoded assumption that this is the index of the audio stream.

Fixes #8540; regression since f5aad350d3695b5b16e7d135154a4c61e4dce9d8.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavformat/wavdec.c | 51 +++++++++++++++++++++++++++-----------------
 1 file changed, 32 insertions(+), 19 deletions(-)
diff mbox series

Patch

diff --git a/libavformat/wavdec.c b/libavformat/wavdec.c
index 8214ab8498..6e584bbbb3 100644
--- a/libavformat/wavdec.c
+++ b/libavformat/wavdec.c
@@ -49,6 +49,7 @@  typedef struct WAVDemuxContext {
     const AVClass *class;
     int64_t data_end;
     int w64;
+    AVStream *ast, *vst;
     int64_t smv_data_ofs;
     int smv_block_size;
     int smv_frames_per_jpeg;
@@ -77,7 +78,7 @@  static const AVOption demux_options[] = {
 
 static void set_spdif(AVFormatContext *s, WAVDemuxContext *wav)
 {
-    if (CONFIG_SPDIF_DEMUXER && s->streams[0]->codecpar->codec_tag == 1) {
+    if (CONFIG_SPDIF_DEMUXER && wav->ast->codecpar->codec_tag == 1) {
         enum AVCodecID codec;
         int len = 1<<16;
         int ret = ffio_ensure_seekback(s->pb, len);
@@ -92,7 +93,7 @@  static void set_spdif(AVFormatContext *s, WAVDemuxContext *wav)
                 if (len >= 0) {
                     ret = ff_spdif_probe(buf, len, &codec);
                     if (ret > AVPROBE_SCORE_EXTENSION) {
-                        s->streams[0]->codecpar->codec_id = codec;
+                        wav->ast->codecpar->codec_id = codec;
                         wav->spdif = 1;
                     }
                 }
@@ -180,6 +181,7 @@  static int wav_parse_fmt_tag(AVFormatContext *s, int64_t size, AVStream **st)
     *st = avformat_new_stream(s, NULL);
     if (!*st)
         return AVERROR(ENOMEM);
+    wav->ast = *st;
 
     ret = ff_get_wav_header(s, pb, (*st)->codecpar, size, wav->rifx);
     if (ret < 0)
@@ -196,6 +198,7 @@  static int wav_parse_fmt_tag(AVFormatContext *s, int64_t size, AVStream **st)
 static int wav_parse_xma2_tag(AVFormatContext *s, int64_t size, AVStream **st)
 {
     AVIOContext *pb = s->pb;
+    WAVDemuxContext *wav = s->priv_data;
     int version, num_streams, i, channels = 0, ret;
 
     if (size < 36)
@@ -204,6 +207,7 @@  static int wav_parse_xma2_tag(AVFormatContext *s, int64_t size, AVStream **st)
     *st = avformat_new_stream(s, NULL);
     if (!*st)
         return AVERROR(ENOMEM);
+    wav->ast = *st;
 
     (*st)->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
     (*st)->codecpar->codec_id   = AV_CODEC_ID_XMA2;
@@ -484,6 +488,7 @@  static int wav_read_header(AVFormatContext *s)
             vst = avformat_new_stream(s, NULL);
             if (!vst)
                 return AVERROR(ENOMEM);
+            wav->vst = vst;
             avio_r8(pb);
             vst->id = 1;
             vst->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
@@ -693,23 +698,29 @@  static int wav_read_packet(AVFormatContext *s, AVPacket *pkt)
 {
     int ret, size;
     int64_t left;
-    AVStream *st;
     WAVDemuxContext *wav = s->priv_data;
+    AVStream *st = wav->ast;
 
-    if (CONFIG_SPDIF_DEMUXER && wav->spdif == 1)
-        return ff_spdif_read_packet(s, pkt);
+    if (CONFIG_SPDIF_DEMUXER && wav->spdif == 1) {
+        ret = ff_spdif_read_packet(s, pkt);
+        if (ret < 0)
+            return ret;
+        pkt->stream_index = st->index;
+        return 0;
+    }
 
     if (wav->smv_data_ofs > 0) {
         int64_t audio_dts, video_dts;
+        AVStream *vst = wav->vst;
 smv_retry:
-        audio_dts = (int32_t)s->streams[0]->cur_dts;
-        video_dts = (int32_t)s->streams[1]->cur_dts;
+        audio_dts = (int32_t)st->cur_dts;
+        video_dts = (int32_t)vst->cur_dts;
 
         if (audio_dts != AV_NOPTS_VALUE && video_dts != AV_NOPTS_VALUE) {
             /*We always return a video frame first to get the pixel format first*/
             wav->smv_last_stream = wav->smv_given_first ?
-                av_compare_ts(video_dts, s->streams[1]->time_base,
-                              audio_dts, s->streams[0]->time_base) > 0 : 0;
+                av_compare_ts(video_dts, vst->time_base,
+                              audio_dts,  st->time_base) > 0 : 0;
             wav->smv_given_first = 1;
         }
         wav->smv_last_stream = !wav->smv_last_stream;
@@ -732,7 +743,7 @@  smv_retry:
             pkt->duration = wav->smv_frames_per_jpeg;
             wav->smv_block++;
 
-            pkt->stream_index = 1;
+            pkt->stream_index = vst->index;
 smv_out:
             avio_seek(s->pb, old_pos, SEEK_SET);
             if (ret == AVERROR_EOF) {
@@ -743,8 +754,6 @@  smv_out:
         }
     }
 
-    st = s->streams[0];
-
     left = wav->data_end - avio_tell(s->pb);
     if (wav->ignore_length)
         left = INT_MAX;
@@ -772,7 +781,7 @@  smv_out:
     ret  = av_get_packet(s->pb, pkt, size);
     if (ret < 0)
         return ret;
-    pkt->stream_index = 0;
+    pkt->stream_index = st->index;
 
     return ret;
 }
@@ -781,22 +790,25 @@  static int wav_read_seek(AVFormatContext *s,
                          int stream_index, int64_t timestamp, int flags)
 {
     WAVDemuxContext *wav = s->priv_data;
-    AVStream *st;
+    AVStream *ast = wav->ast, *vst = wav->vst;
     wav->smv_eof = 0;
     wav->audio_eof = 0;
+
+    if (stream_index != ast->index &&
+        (!vst || stream_index != vst->index))
+        return AVERROR(EINVAL);
     if (wav->smv_data_ofs > 0) {
         int64_t smv_timestamp = timestamp;
-        if (stream_index == 0)
-            smv_timestamp = av_rescale_q(timestamp, s->streams[0]->time_base, s->streams[1]->time_base);
+        if (stream_index == ast->index)
+            smv_timestamp = av_rescale_q(timestamp, ast->time_base, vst->time_base);
         else
-            timestamp = av_rescale_q(smv_timestamp, s->streams[1]->time_base, s->streams[0]->time_base);
+            timestamp = av_rescale_q(smv_timestamp, vst->time_base, ast->time_base);
         if (wav->smv_frames_per_jpeg > 0) {
             wav->smv_block = smv_timestamp / wav->smv_frames_per_jpeg;
         }
     }
 
-    st = s->streams[0];
-    switch (st->codecpar->codec_id) {
+    switch (ast->codecpar->codec_id) {
     case AV_CODEC_ID_MP2:
     case AV_CODEC_ID_MP3:
     case AV_CODEC_ID_AC3:
@@ -870,6 +882,7 @@  static int w64_read_header(AVFormatContext *s)
     st = avformat_new_stream(s, NULL);
     if (!st)
         return AVERROR(ENOMEM);
+    wav->ast = st;
 
     while (!avio_feof(pb)) {
         if (avio_read(pb, guid, 16) != 16)