[FFmpeg-devel,23/37] avformat/matroskadec: Redo level handling

Submitted by Andreas Rheinhardt on May 16, 2019, 10:30 p.m.

Details

Message ID 20190516223018.30827-24-andreas.rheinhardt@gmail.com
State New
Headers show

Commit Message

Andreas Rheinhardt May 16, 2019, 10:30 p.m.
This commit changes how levels are handled: If the level used for
ebml_parse ends directly after an element that has been consumed, then
ebml_parse ends the level itself (and any known-length levels that end
there as well) and informs the caller via the return value; if the
current level is of unknown-length, then the level is ended as soon as
an element that is not valid on the current level, but on a higher
level is encountered (or if EOF has been encountered).

This is designed for situations where one wants to parse master elements
incrementally, i.e. not in one go via ebml_parse_nest.

The (incremental) parsing of clusters still mixes levels by using a
syntax list that contains elements from different levels and the level
is still ended manually via a call to ebml_level_end if the last cluster
was an unknown-length cluster (known-length clusters are already ended
when their last element is read), but only if the next element is a
cluster, too. A  different level 1 element following an unknown-length
cluster will currently simply be presumed to be part of the earlier
cluster. Fixing this will be done in a future patch. The modifications
to matroska_parse_cluster contained in this patch are only intended not
to cause regressions.

Nevertheless, the fact that known-length levels are automatically ended
in ebml_parse when their last element has been read already fixes a bogus
error message introduced in 9326117b that was emitted when a known-length
cluster is followed by another level 1 element other than a cluster in
which case the cluster's level was not ended (which only happened when
a new cluster has been encountered) so that the length check (introduced
in 9326117b) failed for the level 1 element as it is of course not
contained in the previous cluster. Most Matroska files were affected by
this.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@gmail.com>
---
 libavformat/matroskadec.c | 103 ++++++++++++++++++++++++++++++--------
 1 file changed, 82 insertions(+), 21 deletions(-)

Patch hide | download patch | download mbox

diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index 31c70c84d4..97e25c4c99 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -71,6 +71,8 @@ 
 #define EBML_UNKNOWN_LENGTH  UINT64_MAX /* EBML unknown length, in uint64_t */
 #define NEEDS_CHECKING                2 /* Indicates that some error checks
                                          * still need to be performed */
+#define LEVEL_ENDED                   3 /* return value of ebml_parse when the
+                                         * syntax level used for parsing ended. */
 
 typedef enum {
     EBML_NONE,
@@ -1083,7 +1085,7 @@  static EbmlSyntax *ebml_parse_id(EbmlSyntax *syntax, uint32_t id)
 static int ebml_parse_nest(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
                            void *data)
 {
-    int i, res = 0;
+    int i, res;
 
     for (i = 0; syntax[i].id; i++)
         switch (syntax[i].type) {
@@ -1108,10 +1110,16 @@  static int ebml_parse_nest(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
             break;
         }
 
-    while (!res && !ebml_level_end(matroska))
+    if (!matroska->levels[matroska->num_levels - 1].length) {
+        matroska->num_levels--;
+        return 0;
+    }
+
+    do {
         res = ebml_parse(matroska, syntax, data);
+    } while (!res);
 
-    return res;
+    return res == LEVEL_ENDED ? 0 : res;
 }
 
 static int is_ebml_id_valid(uint32_t id)
@@ -1180,17 +1188,26 @@  static int ebml_parse(MatroskaDemuxContext *matroska,
     uint32_t id;
     uint64_t length;
     int64_t pos = avio_tell(pb);
-    int res, update_pos = 1;
+    int res, update_pos = 1, level_check;
     void *newelem;
     MatroskaLevel1Element *level1_elem;
+    MatroskaLevel *level = matroska->num_levels ? &matroska->levels[matroska->num_levels - 1] : NULL;
 
     if (!matroska->current_id) {
         uint64_t id;
         res = ebml_read_num(matroska, pb, 4, &id, 0);
         if (res < 0) {
-            // in live mode, finish parsing if EOF is reached.
-            return (matroska->is_live && pb->eof_reached &&
-                    res == AVERROR_EOF) ? 1 : res;
+            if (pb->eof_reached && res == AVERROR_EOF) {
+                if (matroska->is_live)
+                    // in live mode, finish parsing if EOF is reached.
+                    return 1;
+                if (level && level->length == EBML_UNKNOWN_LENGTH && pos == avio_tell(pb)) {
+                    // Unknown-length levels automatically end at EOF.
+                    matroska->num_levels--;
+                    return LEVEL_ENDED;
+                }
+            }
+            return res;
         }
         matroska->current_id = id | 1 << 7 * res;
     } else
@@ -1199,13 +1216,22 @@  static int ebml_parse(MatroskaDemuxContext *matroska,
     id = matroska->current_id;
 
     syntax = ebml_parse_id(syntax, id);
-    if (!syntax->id && id == MATROSKA_ID_CLUSTER &&
-        matroska->num_levels > 0                 &&
-        matroska->levels[matroska->num_levels - 1].length == EBML_UNKNOWN_LENGTH)
-        return 0;  // we reached the end of an unknown size cluster
     if (!syntax->id && id != EBML_ID_VOID && id != EBML_ID_CRC32) {
-        av_log(matroska->ctx, AV_LOG_DEBUG, "Unknown entry 0x%"PRIX32"\n", id);
-        update_pos = 0;
+        if (level && level->length == EBML_UNKNOWN_LENGTH) {
+            // Unknown-length levels end when an element from an upper level
+            // in the hierarchy is encountered.
+            while (syntax->def.n) {
+                syntax = ebml_parse_id(syntax->def.n, id);
+                if (syntax->id) {
+                    matroska->num_levels--;
+                    return LEVEL_ENDED;
+                }
+            };
+        }
+
+        av_log(matroska->ctx, AV_LOG_DEBUG, "Unknown entry 0x%"PRIX32" at pos. "
+                                            "%"PRId64"\n", id, pos);
+        update_pos = 0; /* Don't update resync_pos as an error might have happened. */
     }
 
     data = (char *) data + syntax->data_offset;
@@ -1240,26 +1266,34 @@  static int ebml_parse(MatroskaDemuxContext *matroska,
                 uint64_t elem_end = pos + length,
                         level_end = level->start + level->length;
 
-                if (level_end < elem_end) {
+                if (elem_end < level_end) {
+                    level_check = 0;
+                } else if (elem_end == level_end) {
+                    level_check = LEVEL_ENDED;
+                } else {
                     av_log(matroska->ctx, AV_LOG_ERROR,
                            "Element at 0x%"PRIx64" ending at 0x%"PRIx64" exceeds "
                            "containing master element ending at 0x%"PRIx64"\n",
                            pos, elem_end, level_end);
                     return AVERROR_INVALIDDATA;
                 }
+            } else if (length != EBML_UNKNOWN_LENGTH) {
+                level_check = 0;
             } else if (level->length != EBML_UNKNOWN_LENGTH) {
                 av_log(matroska->ctx, AV_LOG_ERROR, "Unknown-sized element "
                        "at 0x%"PRIx64" inside parent with finite size\n", pos);
                 return AVERROR_INVALIDDATA;
-            } else if (length == EBML_UNKNOWN_LENGTH && id != MATROSKA_ID_CLUSTER) {
+            } else if (id != MATROSKA_ID_CLUSTER) {
                 // According to the specifications only clusters and segments
                 // are allowed to be unknown-sized.
                 av_log(matroska->ctx, AV_LOG_ERROR,
                        "Found unknown-sized element other than a cluster at "
                        "0x%"PRIx64". Dropping the invalid element.\n", pos);
                 return AVERROR_INVALIDDATA;
-            }
-        }
+            } else
+                level_check = 0;
+        } else
+            level_check = 0;
 
         if (update_pos) {
             // We have found an element that is allowed at this place
@@ -1300,7 +1334,9 @@  static int ebml_parse(MatroskaDemuxContext *matroska,
                 av_log(matroska->ctx, AV_LOG_ERROR, "Duplicate element\n");
             level1_elem->parsed = 1;
         }
-        return ebml_parse_nest(matroska, syntax->def.n, data);
+        if (res = ebml_parse_nest(matroska, syntax->def.n, data))
+            return res;
+        break;
     case EBML_STOP:
         return 1;
     default:
@@ -1330,7 +1366,7 @@  static int ebml_parse(MatroskaDemuxContext *matroska,
                 else
                     res = AVERROR_EOF;
             } else
-                res = 0;
+                goto level_check;
         }
 
         if (res == AVERROR_INVALIDDATA)
@@ -1339,8 +1375,24 @@  static int ebml_parse(MatroskaDemuxContext *matroska,
             av_log(matroska->ctx, AV_LOG_ERROR, "Read error\n");
         else if (res == AVERROR_EOF)
             av_log(matroska->ctx, AV_LOG_ERROR, "File ended prematurely\n");
+
+        return res;
     }
-    return res;
+
+level_check:
+    if (level_check == LEVEL_ENDED && matroska->num_levels) {
+        level = &matroska->levels[matroska->num_levels - 1];
+        pos   = avio_tell(pb);
+
+        // Given that pos >= level->start no check for
+        // level->length != EBML_UNKNOWN_LENGTH is necessary.
+        while (matroska->num_levels && pos == level->start + level->length) {
+            matroska->num_levels--;
+            level--;
+        }
+    }
+
+    return level_check;
 }
 
 static void ebml_free(EbmlSyntax *syntax, void *data)
@@ -1699,6 +1751,10 @@  static int matroska_parse_seekhead_entry(MatroskaDemuxContext *matroska,
             matroska->current_id                   = 0;
 
             ret = ebml_parse(matroska, matroska_segment, matroska);
+            if (ret == LEVEL_ENDED) {
+                /* This can only happen if the seek brought us beyond EOF. */
+                ret = AVERROR_EOF;
+            }
         }
     }
     /* Seek back - notice that in all instances where this is used it is safe
@@ -3559,9 +3615,11 @@  static int matroska_parse_cluster(MatroskaDemuxContext *matroska)
             res = ebml_parse(matroska,
                              matroska_cluster_parsing,
                              cluster);
+        else
+            cluster->pos = 0;
     }
 
-    if (!res && block->bin.size > 0) {
+    if (res >= 0 && block->bin.size > 0) {
             int is_keyframe = block->non_simple ? block->reference == INT64_MIN : -1;
             uint8_t* additional = block->additional.size > 0 ?
                                     block->additional.data : NULL;
@@ -3576,6 +3634,9 @@  static int matroska_parse_cluster(MatroskaDemuxContext *matroska)
                                        block->discard_padding);
     }
 
+    if (res == LEVEL_ENDED)
+        cluster->pos = 0;
+
     ebml_free(matroska_blockgroup, block);
     memset(block, 0, sizeof(*block));