diff mbox

[FFmpeg-devel,14/21] avformat/matroskadec: Use proper levels after discontínuity

Message ID 20190327111852.3784-15-andreas.rheinhardt@googlemail.com
State New
Headers show

Commit Message

Diego Felix de Souza via ffmpeg-devel March 27, 2019, 11:18 a.m. UTC
The earlier code set the level to zero upon seeking and after a
discontinuity although in both cases parsing (re)starts at a level 1
element.

Also set the segment's length to unkown if an error occured in order not
to drop any valid data that happens to be beyond the designated end of
the segment.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@googlemail.com>
---
 libavformat/matroskadec.c | 59 +++++++++++++++++++++++----------------
 1 file changed, 35 insertions(+), 24 deletions(-)

Comments

Steve Lhomme April 7, 2019, 7:30 a.m. UTC | #1
On 3/27/2019 12:18 PM, Andreas Rheinhardt via ffmpeg-devel wrote:
> The earlier code set the level to zero upon seeking and after a
> discontinuity although in both cases parsing (re)starts at a level 1
> element.
>
> Also set the segment's length to unkown if an error occured in order not
> to drop any valid data that happens to be beyond the designated end of
> the segment.
>
> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@googlemail.com>
> ---
>   libavformat/matroskadec.c | 59 +++++++++++++++++++++++----------------
>   1 file changed, 35 insertions(+), 24 deletions(-)
>
> diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
> index 0179e5426e..42f1c21042 100644
> --- a/libavformat/matroskadec.c
> +++ b/libavformat/matroskadec.c
> @@ -737,13 +737,24 @@ static const char *const matroska_doctypes[] = { "matroska", "webm" };
>   
>   static int matroska_read_close(AVFormatContext *s);
>   
> +static int matroska_reset_status(MatroskaDemuxContext *matroska,
> +                                 uint32_t id, int64_t position)
> +{
> +    matroska->current_id = id;
> +    matroska->num_levels = 1;
> +    matroska->current_cluster.pos = 0;
> +
> +    if (position >= 0)
> +        return avio_seek(matroska->ctx->pb, position, SEEK_SET);
> +
> +    return 0;
> +}
> +

I think you should have done this commit in 2 parts.
- adding matroska_reset_status() and do exactly what was done before
- add changes related to the level and unknown length/discontinuity.

>   static int matroska_resync(MatroskaDemuxContext *matroska, int64_t last_pos)
>   {
>       AVIOContext *pb = matroska->ctx->pb;
>       int64_t ret;
>       uint32_t id;
> -    matroska->current_id = 0;
> -    matroska->num_levels = 0;
>   
>       /* seek to next position to resync from */
>       if ((ret = avio_seek(pb, last_pos + 1, SEEK_SET)) < 0) {
> @@ -759,7 +770,14 @@ static int matroska_resync(MatroskaDemuxContext *matroska, int64_t last_pos)
>               id == MATROSKA_ID_CUES     || id == MATROSKA_ID_TAGS        ||
>               id == MATROSKA_ID_SEEKHEAD || id == MATROSKA_ID_ATTACHMENTS ||
>               id == MATROSKA_ID_CLUSTER  || id == MATROSKA_ID_CHAPTERS) {
> -            matroska->current_id = id;
> +            /* Prepare the context for further parsing of a level 1 element. */
> +            matroska_reset_status(matroska, id, -1);

You set num_levels to 1 now, leaving this function used to have 
num_levels set to 0. I'm not sure it's correct or not.

> +
> +            /* Given that we are here means that an error has occured,

I thought this function was meant to find a level1 ID after getting into 
a Segment. This does not seem like an error at all.

> +             * so treat the segment as unknown length in order not to
> +             * discard valid data that happens to be beyond the designated
> +             * end of the segment. */
> +            matroska->levels[0].length = EBML_UNKNOWN_LENGTH;
>               return 0;
>           }
>           id = (id << 8) | avio_r8(pb);
> @@ -1610,18 +1628,12 @@ static int matroska_parse_seekhead_entry(MatroskaDemuxContext *matroska,
>               matroska->current_id                   = 0;
>   
>               ret = ebml_parse(matroska, matroska_segment, matroska);
> -
> -            /* remove dummy level */
> -            while (matroska->num_levels) {
> -                uint64_t length = matroska->levels[--matroska->num_levels].length;
> -                if (length == EBML_UNKNOWN_LENGTH)
> -                    break;
> -            }

I think this code indicates unknown length was handled in a seekhead 
entry, probably because such files exist. Making the assumption in 13/21 
about unknown length only on Segment+Cluster incorrect.

>           }
>       }
> -    /* seek back */
> -    avio_seek(matroska->ctx->pb, before_pos, SEEK_SET);
> -    matroska->current_id = saved_id;
> +
> +    /* Seek back - notice that in all instances where this is used it is safe
> +     * to set the level to 1 and unset the position of the current cluster. */
> +    matroska_reset_status(matroska, saved_id, before_pos);
>   
>       return ret;
>   }
> @@ -3535,9 +3547,7 @@ static int matroska_read_seek(AVFormatContext *s, int stream_index,
>       timestamp = FFMAX(timestamp, st->index_entries[0].timestamp);
>   
>       if ((index = av_index_search_timestamp(st, timestamp, flags)) < 0 || index == st->nb_index_entries - 1) {
> -        avio_seek(s->pb, st->index_entries[st->nb_index_entries - 1].pos,
> -                  SEEK_SET);
> -        matroska->current_id = 0;
> +        matroska_reset_status(matroska, 0, st->index_entries[st->nb_index_entries - 1].pos);
>           while ((index = av_index_search_timestamp(st, timestamp, flags)) < 0 || index == st->nb_index_entries - 1) {
>               matroska_clear_queue(matroska);
>               if (matroska_parse_cluster(matroska) < 0)
> @@ -3557,8 +3567,8 @@ static int matroska_read_seek(AVFormatContext *s, int stream_index,
>           tracks[i].end_timecode         = 0;
>       }
>   
> -    avio_seek(s->pb, st->index_entries[index].pos, SEEK_SET);
> -    matroska->current_id       = 0;
> +    /* We seek to a level 1 element, so set the appropriate status. */
> +    matroska_reset_status(matroska, 0, st->index_entries[index].pos);
>       if (flags & AVSEEK_FLAG_ANY) {
>           st->skip_to_keyframe = 0;
>           matroska->skip_to_timecode = timestamp;
> @@ -3568,18 +3578,16 @@ static int matroska_read_seek(AVFormatContext *s, int stream_index,
>       }
>       matroska->skip_to_keyframe = 1;
>       matroska->done             = 0;
> -    matroska->num_levels       = 0;
>       ff_update_cur_dts(s, st, st->index_entries[index].timestamp);
>       return 0;
>   err:
>       // slightly hackish but allows proper fallback to
>       // the generic seeking code.
> +    matroska_reset_status(matroska, 0, -1);
>       matroska_clear_queue(matroska);
> -    matroska->current_id = 0;
>       st->skip_to_keyframe =
>       matroska->skip_to_keyframe = 0;
>       matroska->done = 0;
> -    matroska->num_levels = 0;
>       return -1;
>   }
>   
> @@ -3662,8 +3670,8 @@ static int webm_clusters_start_with_keyframe(AVFormatContext *s)
>           read = ebml_read_length(matroska, matroska->ctx->pb, &cluster_length);
>           if (read < 0)
>               break;
> -        avio_seek(s->pb, cluster_pos, SEEK_SET);
> -        matroska->current_id = 0;
> +
> +        matroska_reset_status(matroska, 0, cluster_pos);
>           matroska_clear_queue(matroska);
>           if (matroska_parse_cluster(matroska) < 0 ||
>               !matroska->queue) {
> @@ -3677,7 +3685,10 @@ static int webm_clusters_start_with_keyframe(AVFormatContext *s)
>               break;
>           }
>       }
> -    avio_seek(s->pb, before_pos, SEEK_SET);
> +
> +    /* Restore the status after matroska_read_header: */
> +    matroska_reset_status(matroska, MATROSKA_ID_CLUSTER, before_pos);
> +
>       return rv;
>   }
>   
> -- 
> 2.19.2
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Diego Felix de Souza via ffmpeg-devel April 7, 2019, 12:59 p.m. UTC | #2
Steve Lhomme:
> On 3/27/2019 12:18 PM, Andreas Rheinhardt via ffmpeg-devel wrote:
>> The earlier code set the level to zero upon seeking and after a
>> discontinuity although in both cases parsing (re)starts at a level 1
>> element.
>>
>> Also set the segment's length to unkown if an error occured in order
>> not
>> to drop any valid data that happens to be beyond the designated end of
>> the segment.
>>
>> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@googlemail.com>
>> ---
>>   libavformat/matroskadec.c | 59
>> +++++++++++++++++++++++----------------
>>   1 file changed, 35 insertions(+), 24 deletions(-)
>>
>> diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
>> index 0179e5426e..42f1c21042 100644
>> --- a/libavformat/matroskadec.c
>> +++ b/libavformat/matroskadec.c
>> @@ -737,13 +737,24 @@ static const char *const matroska_doctypes[] =
>> { "matroska", "webm" };
>>     static int matroska_read_close(AVFormatContext *s);
>>   +static int matroska_reset_status(MatroskaDemuxContext *matroska,
>> +                                 uint32_t id, int64_t position)
>> +{
>> +    matroska->current_id = id;
>> +    matroska->num_levels = 1;
>> +    matroska->current_cluster.pos = 0;
>> +
>> +    if (position >= 0)
>> +        return avio_seek(matroska->ctx->pb, position, SEEK_SET);
>> +
>> +    return 0;
>> +}
>> +
> 
> I think you should have done this commit in 2 parts.
> - adding matroska_reset_status() and do exactly what was done before
> - add changes related to the level and unknown length/discontinuity.
> 
Ok, that's easily possible.

>>   static int matroska_resync(MatroskaDemuxContext *matroska, int64_t
>> last_pos)
>>   {
>>       AVIOContext *pb = matroska->ctx->pb;
>>       int64_t ret;
>>       uint32_t id;
>> -    matroska->current_id = 0;
>> -    matroska->num_levels = 0;
>>         /* seek to next position to resync from */
>>       if ((ret = avio_seek(pb, last_pos + 1, SEEK_SET)) < 0) {
>> @@ -759,7 +770,14 @@ static int matroska_resync(MatroskaDemuxContext
>> *matroska, int64_t last_pos)
>>               id == MATROSKA_ID_CUES     || id ==
>> MATROSKA_ID_TAGS        ||
>>               id == MATROSKA_ID_SEEKHEAD || id ==
>> MATROSKA_ID_ATTACHMENTS ||
>>               id == MATROSKA_ID_CLUSTER  || id ==
>> MATROSKA_ID_CHAPTERS) {
>> -            matroska->current_id = id;
>> +            /* Prepare the context for further parsing of a level 1
>> element. */
>> +            matroska_reset_status(matroska, id, -1);
> 
> You set num_levels to 1 now, leaving this function used to have
> num_levels set to 0. I'm not sure it's correct or not.
> 
We have found a level 1 element, so the level should be set to 1. If
we enter and parse the level 1 element found, we will be at level two;
currently we are only at level 1 (so that the level 1 element appears
to be the highest level in the hierarchy, when in fact it is not). So
this change is intentional.
>> +
>> +            /* Given that we are here means that an error has occured,
> 
> I thought this function was meant to find a level1 ID after getting
> into a Segment. This does not seem like an error at all.
> 
First the EBML header is parsed as a typical EBML_NEST element; then
the segment is parsed as nest, too. The syntax according to which its
children are parsed contains all level 1 elements; except for a
cluster (which is an EBML_STOP element) all elements are EBML_LEVEL1
and are therefore parsed according to the respective syntax elements
for their descendants. For normal files, there is no need to resync at
all: The next element begins directly after the previous one has ended.

matroska_resync is only used in two situations:
If parsing the segment doesn't find a cluster and we are not in the
mode for parsing live header files. (This implies that an error
(possibly EOF) has occurred.)
And if an error happened during reading a packet (i.e. in
matroska_parse_cluster()).
There is actually no reason to resync for valid files at all. (When
reading till the end, current FFmpeg would use matroska_resync() at
the end even if everything actually looks fine. Patch 20 is designed
to fix this.)
>> +             * so treat the segment as unknown length in order not to
>> +             * discard valid data that happens to be beyond the
>> designated
>> +             * end of the segment. */
>> +            matroska->levels[0].length = EBML_UNKNOWN_LENGTH;
>>               return 0;
>>           }
>>           id = (id << 8) | avio_r8(pb);
>> @@ -1610,18 +1628,12 @@ static int
>> matroska_parse_seekhead_entry(MatroskaDemuxContext *matroska,
>>               matroska->current_id                   = 0;
>>                 ret = ebml_parse(matroska, matroska_segment, matroska);
>> -
>> -            /* remove dummy level */
>> -            while (matroska->num_levels) {
>> -                uint64_t length =
>> matroska->levels[--matroska->num_levels].length;
>> -                if (length == EBML_UNKNOWN_LENGTH)
>> -                    break;
>> -            }
> 
> I think this code indicates unknown length was handled in a seekhead
> entry, probably because such files exist. Making the assumption in
> 13/21 about unknown length only on Segment+Cluster incorrect.
> 
1. The current way of handling unknown-sized elements is this:

a) If the current level is unknown-sized and we encounter a cluster ID
when clusters are not part of the syntax list used currently for
parsing, parsing is stopped and matroska->current_id (containing a
cluster ID) is kept. The code here contains a comment that we reached
the end of an unknown-size cluster, although it needn't be true that
the current master element is indeed a cluster.
(Michael has already merged my patch regarding length checks (that you
partially objected to) so that currently only level 0 elements and
clusters can be unknown-sized, but before that it was not assured that
we really reached the end of an unknown-size cluster.)
b) ebml_level_end() ends the current level if there is one and if
matroska->current_id is set. (It of course also ends the current level
based upon a length criterion, but that is only useful for known-sized
master elements.)

2. As you can see (both from the code and the comment), the code was
based around the assumption that only clusters (and segments) could be
unkown-sized.

3. The code snippet you cite as evidence that unknown length was
somehow supported for seekheads actually doesn't work if the
referenced element is an unknown-length element: In this case, it's
not the dummy level that is removed, but rather only the uppermost
unknown-size level. (And of course, parsing the actual element the
seekhead points to still likely fails, because the code doesn't know
when the levels end.)
>>           }
>>       }
>> -    /* seek back */
>> -    avio_seek(matroska->ctx->pb, before_pos, SEEK_SET);
>> -    matroska->current_id = saved_id;
>> +
>> +    /* Seek back - notice that in all instances where this is used
>> it is safe
>> +     * to set the level to 1 and unset the position of the current
>> cluster. */
>> +    matroska_reset_status(matroska, saved_id, before_pos);
>>         return ret;
>>   }
>> @@ -3535,9 +3547,7 @@ static int matroska_read_seek(AVFormatContext
>> *s, int stream_index,
>>       timestamp = FFMAX(timestamp, st->index_entries[0].timestamp);
>>         if ((index = av_index_search_timestamp(st, timestamp,
>> flags)) < 0 || index == st->nb_index_entries - 1) {
>> -        avio_seek(s->pb, st->index_entries[st->nb_index_entries -
>> 1].pos,
>> -                  SEEK_SET);
>> -        matroska->current_id = 0;
>> +        matroska_reset_status(matroska, 0,
>> st->index_entries[st->nb_index_entries - 1].pos);
>>           while ((index = av_index_search_timestamp(st, timestamp,
>> flags)) < 0 || index == st->nb_index_entries - 1) {
>>               matroska_clear_queue(matroska);
>>               if (matroska_parse_cluster(matroska) < 0)
>> @@ -3557,8 +3567,8 @@ static int matroska_read_seek(AVFormatContext
>> *s, int stream_index,
>>           tracks[i].end_timecode         = 0;
>>       }
>>   -    avio_seek(s->pb, st->index_entries[index].pos, SEEK_SET);
>> -    matroska->current_id       = 0;
>> +    /* We seek to a level 1 element, so set the appropriate status. */
>> +    matroska_reset_status(matroska, 0, st->index_entries[index].pos);
>>       if (flags & AVSEEK_FLAG_ANY) {
>>           st->skip_to_keyframe = 0;
>>           matroska->skip_to_timecode = timestamp;
>> @@ -3568,18 +3578,16 @@ static int
>> matroska_read_seek(AVFormatContext *s, int stream_index,
>>       }
>>       matroska->skip_to_keyframe = 1;
>>       matroska->done             = 0;
>> -    matroska->num_levels       = 0;
>>       ff_update_cur_dts(s, st, st->index_entries[index].timestamp);
>>       return 0;
>>   err:
>>       // slightly hackish but allows proper fallback to
>>       // the generic seeking code.
>> +    matroska_reset_status(matroska, 0, -1);
>>       matroska_clear_queue(matroska);
>> -    matroska->current_id = 0;
>>       st->skip_to_keyframe =
>>       matroska->skip_to_keyframe = 0;
>>       matroska->done = 0;
>> -    matroska->num_levels = 0;
>>       return -1;
>>   }
>>   @@ -3662,8 +3670,8 @@ static int
>> webm_clusters_start_with_keyframe(AVFormatContext *s)
>>           read = ebml_read_length(matroska, matroska->ctx->pb,
>> &cluster_length);
>>           if (read < 0)
>>               break;
>> -        avio_seek(s->pb, cluster_pos, SEEK_SET);
>> -        matroska->current_id = 0;
>> +
>> +        matroska_reset_status(matroska, 0, cluster_pos);
>>           matroska_clear_queue(matroska);
>>           if (matroska_parse_cluster(matroska) < 0 ||
>>               !matroska->queue) {
>> @@ -3677,7 +3685,10 @@ static int
>> webm_clusters_start_with_keyframe(AVFormatContext *s)
>>               break;
>>           }
>>       }
>> -    avio_seek(s->pb, before_pos, SEEK_SET);
>> +
>> +    /* Restore the status after matroska_read_header: */
>> +    matroska_reset_status(matroska, MATROSKA_ID_CLUSTER, before_pos);
>> +
>>       return rv;
>>   }
>>   -- 
>> 2.19.2
diff mbox

Patch

diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index 0179e5426e..42f1c21042 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -737,13 +737,24 @@  static const char *const matroska_doctypes[] = { "matroska", "webm" };
 
 static int matroska_read_close(AVFormatContext *s);
 
+static int matroska_reset_status(MatroskaDemuxContext *matroska,
+                                 uint32_t id, int64_t position)
+{
+    matroska->current_id = id;
+    matroska->num_levels = 1;
+    matroska->current_cluster.pos = 0;
+
+    if (position >= 0)
+        return avio_seek(matroska->ctx->pb, position, SEEK_SET);
+
+    return 0;
+}
+
 static int matroska_resync(MatroskaDemuxContext *matroska, int64_t last_pos)
 {
     AVIOContext *pb = matroska->ctx->pb;
     int64_t ret;
     uint32_t id;
-    matroska->current_id = 0;
-    matroska->num_levels = 0;
 
     /* seek to next position to resync from */
     if ((ret = avio_seek(pb, last_pos + 1, SEEK_SET)) < 0) {
@@ -759,7 +770,14 @@  static int matroska_resync(MatroskaDemuxContext *matroska, int64_t last_pos)
             id == MATROSKA_ID_CUES     || id == MATROSKA_ID_TAGS        ||
             id == MATROSKA_ID_SEEKHEAD || id == MATROSKA_ID_ATTACHMENTS ||
             id == MATROSKA_ID_CLUSTER  || id == MATROSKA_ID_CHAPTERS) {
-            matroska->current_id = id;
+            /* Prepare the context for further parsing of a level 1 element. */
+            matroska_reset_status(matroska, id, -1);
+
+            /* Given that we are here means that an error has occured,
+             * so treat the segment as unknown length in order not to
+             * discard valid data that happens to be beyond the designated
+             * end of the segment. */
+            matroska->levels[0].length = EBML_UNKNOWN_LENGTH;
             return 0;
         }
         id = (id << 8) | avio_r8(pb);
@@ -1610,18 +1628,12 @@  static int matroska_parse_seekhead_entry(MatroskaDemuxContext *matroska,
             matroska->current_id                   = 0;
 
             ret = ebml_parse(matroska, matroska_segment, matroska);
-
-            /* remove dummy level */
-            while (matroska->num_levels) {
-                uint64_t length = matroska->levels[--matroska->num_levels].length;
-                if (length == EBML_UNKNOWN_LENGTH)
-                    break;
-            }
         }
     }
-    /* seek back */
-    avio_seek(matroska->ctx->pb, before_pos, SEEK_SET);
-    matroska->current_id = saved_id;
+
+    /* Seek back - notice that in all instances where this is used it is safe
+     * to set the level to 1 and unset the position of the current cluster. */
+    matroska_reset_status(matroska, saved_id, before_pos);
 
     return ret;
 }
@@ -3535,9 +3547,7 @@  static int matroska_read_seek(AVFormatContext *s, int stream_index,
     timestamp = FFMAX(timestamp, st->index_entries[0].timestamp);
 
     if ((index = av_index_search_timestamp(st, timestamp, flags)) < 0 || index == st->nb_index_entries - 1) {
-        avio_seek(s->pb, st->index_entries[st->nb_index_entries - 1].pos,
-                  SEEK_SET);
-        matroska->current_id = 0;
+        matroska_reset_status(matroska, 0, st->index_entries[st->nb_index_entries - 1].pos);
         while ((index = av_index_search_timestamp(st, timestamp, flags)) < 0 || index == st->nb_index_entries - 1) {
             matroska_clear_queue(matroska);
             if (matroska_parse_cluster(matroska) < 0)
@@ -3557,8 +3567,8 @@  static int matroska_read_seek(AVFormatContext *s, int stream_index,
         tracks[i].end_timecode         = 0;
     }
 
-    avio_seek(s->pb, st->index_entries[index].pos, SEEK_SET);
-    matroska->current_id       = 0;
+    /* We seek to a level 1 element, so set the appropriate status. */
+    matroska_reset_status(matroska, 0, st->index_entries[index].pos);
     if (flags & AVSEEK_FLAG_ANY) {
         st->skip_to_keyframe = 0;
         matroska->skip_to_timecode = timestamp;
@@ -3568,18 +3578,16 @@  static int matroska_read_seek(AVFormatContext *s, int stream_index,
     }
     matroska->skip_to_keyframe = 1;
     matroska->done             = 0;
-    matroska->num_levels       = 0;
     ff_update_cur_dts(s, st, st->index_entries[index].timestamp);
     return 0;
 err:
     // slightly hackish but allows proper fallback to
     // the generic seeking code.
+    matroska_reset_status(matroska, 0, -1);
     matroska_clear_queue(matroska);
-    matroska->current_id = 0;
     st->skip_to_keyframe =
     matroska->skip_to_keyframe = 0;
     matroska->done = 0;
-    matroska->num_levels = 0;
     return -1;
 }
 
@@ -3662,8 +3670,8 @@  static int webm_clusters_start_with_keyframe(AVFormatContext *s)
         read = ebml_read_length(matroska, matroska->ctx->pb, &cluster_length);
         if (read < 0)
             break;
-        avio_seek(s->pb, cluster_pos, SEEK_SET);
-        matroska->current_id = 0;
+
+        matroska_reset_status(matroska, 0, cluster_pos);
         matroska_clear_queue(matroska);
         if (matroska_parse_cluster(matroska) < 0 ||
             !matroska->queue) {
@@ -3677,7 +3685,10 @@  static int webm_clusters_start_with_keyframe(AVFormatContext *s)
             break;
         }
     }
-    avio_seek(s->pb, before_pos, SEEK_SET);
+
+    /* Restore the status after matroska_read_header: */
+    matroska_reset_status(matroska, MATROSKA_ID_CLUSTER, before_pos);
+
     return rv;
 }