diff mbox

[FFmpeg-devel] lavf/matroskadec: fix is_keyframe for early Blocks

Message ID 20170131010549.2403-1-chcunningham@chromium.org
State Superseded
Headers show

Commit Message

chcunningham@chromium.org Jan. 31, 2017, 1:05 a.m. UTC
Blocks are marked as key frames whenever the "reference" field is
zero. This is incorrect for non-keyframe Blocks that take a refernce
on a keyframe at time zero.

Now using -1 to denote "no reference".

Reported to chromium at http://crbug.com/497889 (contains sample)
---
 libavformat/matroskadec.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

Comments

wm4 Jan. 31, 2017, 8:57 a.m. UTC | #1
On Mon, 30 Jan 2017 17:05:49 -0800
Chris Cunningham <chcunningham@chromium.org> wrote:

> Blocks are marked as key frames whenever the "reference" field is
> zero. This is incorrect for non-keyframe Blocks that take a refernce
> on a keyframe at time zero.
> 
> Now using -1 to denote "no reference".
> 
> Reported to chromium at http://crbug.com/497889 (contains sample)
> ---
>  libavformat/matroskadec.c | 9 ++++++---
>  1 file changed, 6 insertions(+), 3 deletions(-)
> 
> diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
> index e6737a70b2..0d033b574c 100644
> --- a/libavformat/matroskadec.c
> +++ b/libavformat/matroskadec.c
> @@ -89,6 +89,7 @@ typedef const struct EbmlSyntax {
>      int list_elem_size;
>      int data_offset;
>      union {
> +        int64_t     i;
>          uint64_t    u;
>          double      f;
>          const char *s;
> @@ -696,7 +697,7 @@ static const EbmlSyntax matroska_blockgroup[] = {
>      { MATROSKA_ID_SIMPLEBLOCK,    EBML_BIN,  0, offsetof(MatroskaBlock, bin) },
>      { MATROSKA_ID_BLOCKDURATION,  EBML_UINT, 0, offsetof(MatroskaBlock, duration) },
>      { MATROSKA_ID_DISCARDPADDING, EBML_SINT, 0, offsetof(MatroskaBlock, discard_padding) },
> -    { MATROSKA_ID_BLOCKREFERENCE, EBML_SINT, 0, offsetof(MatroskaBlock, reference) },
> +    { MATROSKA_ID_BLOCKREFERENCE, EBML_SINT, 0, offsetof(MatroskaBlock, reference), { .i = -1 } },
>      { MATROSKA_ID_CODECSTATE,     EBML_NONE },
>      {                          1, EBML_UINT, 0, offsetof(MatroskaBlock, non_simple), { .u = 1 } },
>      { 0 }
> @@ -1071,6 +1072,8 @@ static int ebml_parse_nest(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
>  
>      for (i = 0; syntax[i].id; i++)
>          switch (syntax[i].type) {
> +        case EBML_SINT:
> +            *(int64_t *) ((char *) data + syntax[i].data_offset) = syntax[i].def.i;
>          case EBML_UINT:

Isn't there a break missing?

>              *(uint64_t *) ((char *) data + syntax[i].data_offset) = syntax[i].def.u;
>              break;
> @@ -3361,7 +3364,7 @@ static int matroska_parse_cluster_incremental(MatroskaDemuxContext *matroska)
>          matroska->current_cluster_num_blocks = blocks_list->nb_elem;
>          i                                    = blocks_list->nb_elem - 1;
>          if (blocks[i].bin.size > 0 && blocks[i].bin.data) {
> -            int is_keyframe = blocks[i].non_simple ? !blocks[i].reference : -1;
> +            int is_keyframe = blocks[i].non_simple ? blocks[i].reference == -1 : -1;
>              uint8_t* additional = blocks[i].additional.size > 0 ?
>                                      blocks[i].additional.data : NULL;
>              if (!blocks[i].non_simple)
> @@ -3399,7 +3402,7 @@ static int matroska_parse_cluster(MatroskaDemuxContext *matroska)
>      blocks      = blocks_list->elem;
>      for (i = 0; i < blocks_list->nb_elem; i++)
>          if (blocks[i].bin.size > 0 && blocks[i].bin.data) {
> -            int is_keyframe = blocks[i].non_simple ? !blocks[i].reference : -1;
> +            int is_keyframe = blocks[i].non_simple ? blocks[i].reference == -1 : -1;
>              res = matroska_parse_block(matroska, blocks[i].bin.data,
>                                         blocks[i].bin.size, blocks[i].bin.pos,
>                                         cluster.timecode, blocks[i].duration,

I don't quite trust this. The file has negative block references too
(what do they even mean?). E.g. one block uses "-123". This doesn't
make much sense to me, and at the very least it means -1 is not a safe
dummy value (because negative values don't mean non-keyframe according
to your patch, while -1 as exception does).

The oldest/most used (until recently at least) mkv demuxer, Haali
actually does every block reference element as a non-keyframe:

http://git.1f0.de/gitweb?p=ffmpeg.git;a=blob;f=libavformat/MatroskaParser.c;h=173c2e1c20da59d4cf0b501639c470331cd4515f;hb=HEAD#l2354

This seems much safer.

Do you have any insight why the file contains such erratic seeming
reference values? I'm sure I'm missing something. Or is it a broken
muxer/broken file?
wm4 Jan. 31, 2017, 9:07 a.m. UTC | #2
On Tue, 31 Jan 2017 09:57:24 +0100
wm4 <nfxjfg@googlemail.com> wrote:

> On Mon, 30 Jan 2017 17:05:49 -0800
> Chris Cunningham <chcunningham@chromium.org> wrote:
> 
> > Blocks are marked as key frames whenever the "reference" field is
> > zero. This is incorrect for non-keyframe Blocks that take a refernce
> > on a keyframe at time zero.
> > 
> > Now using -1 to denote "no reference".
> > 
> > Reported to chromium at http://crbug.com/497889 (contains sample)
> > ---
> >  libavformat/matroskadec.c | 9 ++++++---
> >  1 file changed, 6 insertions(+), 3 deletions(-)
> > 
> > diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
> > index e6737a70b2..0d033b574c 100644
> > --- a/libavformat/matroskadec.c
> > +++ b/libavformat/matroskadec.c
> > @@ -89,6 +89,7 @@ typedef const struct EbmlSyntax {
> >      int list_elem_size;
> >      int data_offset;
> >      union {
> > +        int64_t     i;
> >          uint64_t    u;
> >          double      f;
> >          const char *s;
> > @@ -696,7 +697,7 @@ static const EbmlSyntax matroska_blockgroup[] = {
> >      { MATROSKA_ID_SIMPLEBLOCK,    EBML_BIN,  0, offsetof(MatroskaBlock, bin) },
> >      { MATROSKA_ID_BLOCKDURATION,  EBML_UINT, 0, offsetof(MatroskaBlock, duration) },
> >      { MATROSKA_ID_DISCARDPADDING, EBML_SINT, 0, offsetof(MatroskaBlock, discard_padding) },
> > -    { MATROSKA_ID_BLOCKREFERENCE, EBML_SINT, 0, offsetof(MatroskaBlock, reference) },
> > +    { MATROSKA_ID_BLOCKREFERENCE, EBML_SINT, 0, offsetof(MatroskaBlock, reference), { .i = -1 } },
> >      { MATROSKA_ID_CODECSTATE,     EBML_NONE },
> >      {                          1, EBML_UINT, 0, offsetof(MatroskaBlock, non_simple), { .u = 1 } },
> >      { 0 }
> > @@ -1071,6 +1072,8 @@ static int ebml_parse_nest(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
> >  
> >      for (i = 0; syntax[i].id; i++)
> >          switch (syntax[i].type) {
> > +        case EBML_SINT:
> > +            *(int64_t *) ((char *) data + syntax[i].data_offset) = syntax[i].def.i;
> >          case EBML_UINT:  
> 
> Isn't there a break missing?
> 
> >              *(uint64_t *) ((char *) data + syntax[i].data_offset) = syntax[i].def.u;
> >              break;
> > @@ -3361,7 +3364,7 @@ static int matroska_parse_cluster_incremental(MatroskaDemuxContext *matroska)
> >          matroska->current_cluster_num_blocks = blocks_list->nb_elem;
> >          i                                    = blocks_list->nb_elem - 1;
> >          if (blocks[i].bin.size > 0 && blocks[i].bin.data) {
> > -            int is_keyframe = blocks[i].non_simple ? !blocks[i].reference : -1;
> > +            int is_keyframe = blocks[i].non_simple ? blocks[i].reference == -1 : -1;
> >              uint8_t* additional = blocks[i].additional.size > 0 ?
> >                                      blocks[i].additional.data : NULL;
> >              if (!blocks[i].non_simple)
> > @@ -3399,7 +3402,7 @@ static int matroska_parse_cluster(MatroskaDemuxContext *matroska)
> >      blocks      = blocks_list->elem;
> >      for (i = 0; i < blocks_list->nb_elem; i++)
> >          if (blocks[i].bin.size > 0 && blocks[i].bin.data) {
> > -            int is_keyframe = blocks[i].non_simple ? !blocks[i].reference : -1;
> > +            int is_keyframe = blocks[i].non_simple ? blocks[i].reference == -1 : -1;
> >              res = matroska_parse_block(matroska, blocks[i].bin.data,
> >                                         blocks[i].bin.size, blocks[i].bin.pos,
> >                                         cluster.timecode, blocks[i].duration,  
> 
> I don't quite trust this. The file has negative block references too
> (what do they even mean?). E.g. one block uses "-123". This doesn't
> make much sense to me, and at the very least it means -1 is not a safe
> dummy value (because negative values don't mean non-keyframe according
> to your patch, while -1 as exception does).
> 
> The oldest/most used (until recently at least) mkv demuxer, Haali
> actually does every block reference element as a non-keyframe:
> 
> http://git.1f0.de/gitweb?p=ffmpeg.git;a=blob;f=libavformat/MatroskaParser.c;h=173c2e1c20da59d4cf0b501639c470331cd4515f;hb=HEAD#l2354
> 
> This seems much safer.
> 
> Do you have any insight why the file contains such erratic seeming
> reference values? I'm sure I'm missing something. Or is it a broken
> muxer/broken file?

Oh, nevermind. The values in the reference elements are
supposed to be _relative_ timestamps. This means -1 is still not a safe
dummy value. But then, what is a value of "0" supposed to mean?

Going after Haali seems the safest fix, as it most likely won't break
anything.
diff mbox

Patch

diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index e6737a70b2..0d033b574c 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -89,6 +89,7 @@  typedef const struct EbmlSyntax {
     int list_elem_size;
     int data_offset;
     union {
+        int64_t     i;
         uint64_t    u;
         double      f;
         const char *s;
@@ -696,7 +697,7 @@  static const EbmlSyntax matroska_blockgroup[] = {
     { MATROSKA_ID_SIMPLEBLOCK,    EBML_BIN,  0, offsetof(MatroskaBlock, bin) },
     { MATROSKA_ID_BLOCKDURATION,  EBML_UINT, 0, offsetof(MatroskaBlock, duration) },
     { MATROSKA_ID_DISCARDPADDING, EBML_SINT, 0, offsetof(MatroskaBlock, discard_padding) },
-    { MATROSKA_ID_BLOCKREFERENCE, EBML_SINT, 0, offsetof(MatroskaBlock, reference) },
+    { MATROSKA_ID_BLOCKREFERENCE, EBML_SINT, 0, offsetof(MatroskaBlock, reference), { .i = -1 } },
     { MATROSKA_ID_CODECSTATE,     EBML_NONE },
     {                          1, EBML_UINT, 0, offsetof(MatroskaBlock, non_simple), { .u = 1 } },
     { 0 }
@@ -1071,6 +1072,8 @@  static int ebml_parse_nest(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
 
     for (i = 0; syntax[i].id; i++)
         switch (syntax[i].type) {
+        case EBML_SINT:
+            *(int64_t *) ((char *) data + syntax[i].data_offset) = syntax[i].def.i;
         case EBML_UINT:
             *(uint64_t *) ((char *) data + syntax[i].data_offset) = syntax[i].def.u;
             break;
@@ -3361,7 +3364,7 @@  static int matroska_parse_cluster_incremental(MatroskaDemuxContext *matroska)
         matroska->current_cluster_num_blocks = blocks_list->nb_elem;
         i                                    = blocks_list->nb_elem - 1;
         if (blocks[i].bin.size > 0 && blocks[i].bin.data) {
-            int is_keyframe = blocks[i].non_simple ? !blocks[i].reference : -1;
+            int is_keyframe = blocks[i].non_simple ? blocks[i].reference == -1 : -1;
             uint8_t* additional = blocks[i].additional.size > 0 ?
                                     blocks[i].additional.data : NULL;
             if (!blocks[i].non_simple)
@@ -3399,7 +3402,7 @@  static int matroska_parse_cluster(MatroskaDemuxContext *matroska)
     blocks      = blocks_list->elem;
     for (i = 0; i < blocks_list->nb_elem; i++)
         if (blocks[i].bin.size > 0 && blocks[i].bin.data) {
-            int is_keyframe = blocks[i].non_simple ? !blocks[i].reference : -1;
+            int is_keyframe = blocks[i].non_simple ? blocks[i].reference == -1 : -1;
             res = matroska_parse_block(matroska, blocks[i].bin.data,
                                        blocks[i].bin.size, blocks[i].bin.pos,
                                        cluster.timecode, blocks[i].duration,