diff mbox

[FFmpeg-devel,2/2] avformat/matroskaenc: Regression fix for invalid MKV headers

Message ID MWHPR13MB1678A076C0012A0FB5CFAEE2BA600@MWHPR13MB1678.namprd13.prod.outlook.com
State Superseded
Headers show

Commit Message

Soft Works Jan. 5, 2017, 6:07 p.m. UTC
The following three commits created a regression by writing initially
invalid mkv headers:

650e17d88b63b5aca6e0a43483e89e64b0f7d2dd avformat/matroskaenc: write a
CRC32 element on Tags
3bcadf822711720ff0f8d14db71ae47cdf97e652 avformat/matroskaenc: write a
CRC32 element on Info
ee888cfbe777cd2916a3548c750e433ab8f8e6a5 avformat/matroskaenc: postpone
writing the Tracks master

Symptoms:

- You can no longer playback a file that is still processed by ffmpeg,
e.g. VLC fails playback
- You can no longer stream a file to a client while if is still being
processed
- Various diagnosing tools show header errors or incomplete headers
(e.g. ffprobe, mediainfo, mkvalidator)

Note: The symptoms do not apply to completed files or ffmpeg runs that
were interrupted with 'q'

Cause:

The mentioned commits made changes in a way that some header elements
are only partially written in
mkv_write_header, leaving the header in an invalid state. Only in
mkv_write_trailer, these elements
are finished correctly, but that does only occur at the end of the
process.

Regression:

Before these commits were applied, mkv headers have always been valid,
even before completion of ffmpeg.
This has worked reliably over many versions of ffmpeg, to it was an
obvious regression.

Bugtracker:

This issue has been recorded as #5977 which is resolved by this patch

Patch:

The patch adds a new function 'end_ebml_master_crc32_preliminary' that
preliminarily finishes the ebl
element without destroying the buffer. The buffer can be used to update
the ebml element later during
mkv_write_trailer. But most important: mkv_write_header finishes with a
valid mkv header again.
---
 libavformat/matroskaenc.c | 28 +++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)

Comments

James Almer Jan. 6, 2017, 5:02 a.m. UTC | #1
On 1/5/2017 3:07 PM, Soft Works wrote:
> The following three commits created a regression by writing initially
> invalid mkv headers:
> 
> 650e17d88b63b5aca6e0a43483e89e64b0f7d2dd avformat/matroskaenc: write a
> CRC32 element on Tags
> 3bcadf822711720ff0f8d14db71ae47cdf97e652 avformat/matroskaenc: write a
> CRC32 element on Info
> ee888cfbe777cd2916a3548c750e433ab8f8e6a5 avformat/matroskaenc: postpone
> writing the Tracks master
> 
> Symptoms:
> 
> - You can no longer playback a file that is still processed by ffmpeg,
> e.g. VLC fails playback
> - You can no longer stream a file to a client while if is still being
> processed
> - Various diagnosing tools show header errors or incomplete headers
> (e.g. ffprobe, mediainfo, mkvalidator)
> 
> Note: The symptoms do not apply to completed files or ffmpeg runs that
> were interrupted with 'q'
> 
> Cause:
> 
> The mentioned commits made changes in a way that some header elements
> are only partially written in
> mkv_write_header, leaving the header in an invalid state. Only in
> mkv_write_trailer, these elements
> are finished correctly, but that does only occur at the end of the
> process.
> 
> Regression:
> 
> Before these commits were applied, mkv headers have always been valid,
> even before completion of ffmpeg.
> This has worked reliably over many versions of ffmpeg, to it was an
> obvious regression.
> 
> Bugtracker:
> 
> This issue has been recorded as #5977 which is resolved by this patch
> 
> Patch:
> 
> The patch adds a new function 'end_ebml_master_crc32_preliminary' that
> preliminarily finishes the ebl
> element without destroying the buffer. The buffer can be used to update
> the ebml element later during
> mkv_write_trailer. But most important: mkv_write_header finishes with a
> valid mkv header again.
> ---
>  libavformat/matroskaenc.c | 28 +++++++++++++++++++++++++---
>  1 file changed, 25 insertions(+), 3 deletions(-)
> 
> diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
> index 827d755..27d83a6 100644
> --- a/libavformat/matroskaenc.c
> +++ b/libavformat/matroskaenc.c
> @@ -367,6 +367,28 @@ static void end_ebml_master_crc32(AVIOContext *pb, AVIOContext **dyn_cp, Matrosk
>      *dyn_cp = NULL;
>  }
>  
> +/**
> +* Complete ebml master whithout destroying the buffer, allowing for later updates
> +*/
> +static void end_ebml_master_crc32_preliminary(AVIOContext *pb, AVIOContext **dyn_cp, MatroskaMuxContext *mkv,
> +    ebml_master master)
> +{
> +    uint8_t *buf, crc[4];
> +    int size, skip = 0;
> +
> +    if (pb->seekable) {
> +
> +        size = avio_get_dyn_buf(*dyn_cp, &buf);
> +        if (mkv->write_crc && mkv->mode != MODE_WEBM) {
> +            skip = 6; /* Skip reserved 6-byte long void element from the dynamic buffer. */
> +            AV_WL32(crc, av_crc(av_crc_get_table(AV_CRC_32_IEEE_LE), UINT32_MAX, buf + skip, size - skip) ^ UINT32_MAX);
> +            put_ebml_binary(pb, EBML_ID_CRC32, crc, sizeof(crc));
> +        }

IMO, no point calculating and writing a CRC element for this temporary state.
You can rename and simplify this function into something like

static void end_ebml_master_preliminary(AVIOContext *pb, AVIOContext **dyn_cp,
                                        ebml_master master)
{
    uint8_t *buf;
    int size = avio_get_dyn_buf(*dyn_cp, &buf);

    avio_write(pb, buf, size);
    end_ebml_master(pb, master);
}

> +        avio_write(pb, buf + skip, size - skip);
> +        end_ebml_master(pb, master);
> +    }
> +}
> +
>  static void put_xiph_size(AVIOContext *pb, int size)
>  {
>      ffio_fill(pb, 255, size / 255);
> @@ -1309,7 +1331,7 @@ static int mkv_write_tracks(AVFormatContext *s)
>      }
>  
>      if (pb->seekable && !mkv->is_live)
> -        put_ebml_void(pb, avio_tell(mkv->tracks_bc));
> +        end_ebml_master_crc32_preliminary(pb, &mkv->tracks_bc, mkv, mkv->tracks_master);
>      else
>          end_ebml_master_crc32(pb, &mkv->tracks_bc, mkv, mkv->tracks_master);
>  
> @@ -1554,7 +1576,7 @@ static int mkv_write_tags(AVFormatContext *s)
>  
>      if (mkv->tags.pos) {
>          if (s->pb->seekable && !mkv->is_live)
> -            put_ebml_void(s->pb, avio_tell(mkv->tags_bc));
> +            end_ebml_master_crc32_preliminary(s->pb, &mkv->tags_bc, mkv, mkv->tags);
>          else
>              end_ebml_master_crc32(s->pb, &mkv->tags_bc, mkv, mkv->tags);
>      }
> @@ -1811,7 +1833,7 @@ static int mkv_write_header(AVFormatContext *s)
>          }
>      }
>      if (s->pb->seekable && !mkv->is_live)
> -        put_ebml_void(s->pb, avio_tell(pb));
> +        end_ebml_master_crc32_preliminary(s->pb, &mkv->info_bc, mkv, mkv->info);
>      else
>          end_ebml_master_crc32(s->pb, &mkv->info_bc, mkv, mkv->info);
>      pb = s->pb;

FATE passes so LGTM.

I'll leave the avio changes review to someone else.
Soft Works Jan. 6, 2017, 5:41 p.m. UTC | #2
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> on behalf of James Almer <jamrial@gmail.com>
> Sent: Friday, January 6, 2017 6:02 AM
> 
> IMO, no point calculating and writing a CRC element for this temporary state.
> You can rename and simplify this function into something like
> 
> static void end_ebml_master_preliminary(AVIOContext *pb, AVIOContext **dyn_cp,
>                                         ebml_master master)
> {
>     uint8_t *buf;
>     int size = avio_get_dyn_buf(*dyn_cp, &buf);
> 
>     avio_write(pb, buf, size);
>     end_ebml_master(pb, master);
> }

James,

thanks for looking into this!

I wasn't sure if clients would be OK when some headers have CRC and some have not 
(in the preliminary state). Also I'm not sure if clients are OK with the CRC bytes being 
zero. 
But if you're sure that all this is fine, I'll make this change...

softworkz
James Almer Jan. 6, 2017, 6 p.m. UTC | #3
On 1/6/2017 2:41 PM, Soft Works wrote:
>> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> on behalf of James Almer <jamrial@gmail.com>
>> Sent: Friday, January 6, 2017 6:02 AM
>>
>> IMO, no point calculating and writing a CRC element for this temporary state.
>> You can rename and simplify this function into something like
>>
>> static void end_ebml_master_preliminary(AVIOContext *pb, AVIOContext **dyn_cp,
>>                                         ebml_master master)
>> {
>>     uint8_t *buf;
>>     int size = avio_get_dyn_buf(*dyn_cp, &buf);
>>
>>     avio_write(pb, buf, size);
>>     end_ebml_master(pb, master);
>> }
> 
> James,
> 
> thanks for looking into this!
> 
> I wasn't sure if clients would be OK when some headers have CRC and some have not 
> (in the preliminary state). Also I'm not sure if clients are OK with the CRC bytes being 
> zero. 
> But if you're sure that all this is fine, I'll make this change...
> 
> softworkz

start_ebml_master_crc32() only reserves the space needed for the CRC32 element,
which is then written by end_ebml_master_crc32(). The preliminary header will
have a couple six bytes long Void elements that every parser will promptly
ignore.

CRC32 elements on this preliminarily state are pointless. Extra cycles wasted
calculating something that will be overwritten and recalculated at the end of
the process.
Soft Works Jan. 6, 2017, 8:39 p.m. UTC | #4
> James Almer wrote
> 
> 
> start_ebml_master_crc32() only reserves the space needed for the CRC32 element,
> which is then written by end_ebml_master_crc32(). The preliminary header will
> have a couple six bytes long Void elements that every parser will promptly
> ignore.
> 
> CRC32 elements on this preliminarily state are pointless. Extra cycles wasted
> calculating something that will be overwritten and recalculated at the end of
> the process.

Thank you very much for the clarification. I made the change and checked the resulting
headers. I'll submit it once fate is done.

Now I hope someone will review the avio change.

Thanks again,

softworkz
diff mbox

Patch

diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
index 827d755..27d83a6 100644
--- a/libavformat/matroskaenc.c
+++ b/libavformat/matroskaenc.c
@@ -367,6 +367,28 @@  static void end_ebml_master_crc32(AVIOContext *pb, AVIOContext **dyn_cp, Matrosk
     *dyn_cp = NULL;
 }
 
+/**
+* Complete ebml master whithout destroying the buffer, allowing for later updates
+*/
+static void end_ebml_master_crc32_preliminary(AVIOContext *pb, AVIOContext **dyn_cp, MatroskaMuxContext *mkv,
+    ebml_master master)
+{
+    uint8_t *buf, crc[4];
+    int size, skip = 0;
+
+    if (pb->seekable) {
+
+        size = avio_get_dyn_buf(*dyn_cp, &buf);
+        if (mkv->write_crc && mkv->mode != MODE_WEBM) {
+            skip = 6; /* Skip reserved 6-byte long void element from the dynamic buffer. */
+            AV_WL32(crc, av_crc(av_crc_get_table(AV_CRC_32_IEEE_LE), UINT32_MAX, buf + skip, size - skip) ^ UINT32_MAX);
+            put_ebml_binary(pb, EBML_ID_CRC32, crc, sizeof(crc));
+        }
+        avio_write(pb, buf + skip, size - skip);
+        end_ebml_master(pb, master);
+    }
+}
+
 static void put_xiph_size(AVIOContext *pb, int size)
 {
     ffio_fill(pb, 255, size / 255);
@@ -1309,7 +1331,7 @@  static int mkv_write_tracks(AVFormatContext *s)
     }
 
     if (pb->seekable && !mkv->is_live)
-        put_ebml_void(pb, avio_tell(mkv->tracks_bc));
+        end_ebml_master_crc32_preliminary(pb, &mkv->tracks_bc, mkv, mkv->tracks_master);
     else
         end_ebml_master_crc32(pb, &mkv->tracks_bc, mkv, mkv->tracks_master);
 
@@ -1554,7 +1576,7 @@  static int mkv_write_tags(AVFormatContext *s)
 
     if (mkv->tags.pos) {
         if (s->pb->seekable && !mkv->is_live)
-            put_ebml_void(s->pb, avio_tell(mkv->tags_bc));
+            end_ebml_master_crc32_preliminary(s->pb, &mkv->tags_bc, mkv, mkv->tags);
         else
             end_ebml_master_crc32(s->pb, &mkv->tags_bc, mkv, mkv->tags);
     }
@@ -1811,7 +1833,7 @@  static int mkv_write_header(AVFormatContext *s)
         }
     }
     if (s->pb->seekable && !mkv->is_live)
-        put_ebml_void(s->pb, avio_tell(pb));
+        end_ebml_master_crc32_preliminary(s->pb, &mkv->info_bc, mkv, mkv->info);
     else
         end_ebml_master_crc32(s->pb, &mkv->info_bc, mkv, mkv->info);
     pb = s->pb;