diff mbox

[FFmpeg-devel] ffmpeg_opt: remove the stats metadata added by mkvmerge.

Message ID 20170406102743.20965-1-george@nsup.org
State New
Headers show

Commit Message

Nicolas George April 6, 2017, 10:27 a.m. UTC
They are probably not valid for the resulting file.
They look like this:
      BPS             : 40665
      BPS-eng         : 40665
      DURATION        : 00:00:20.000000000
      DURATION-eng    : 00:00:20.000000000
      NUMBER_OF_FRAMES: 10
      NUMBER_OF_FRAMES-eng: 10
      NUMBER_OF_BYTES : 101664
      NUMBER_OF_BYTES-eng: 101664
      _STATISTICS_WRITING_APP: mkvmerge v9.8.0 ('Kuglblids') 64bit
      _STATISTICS_WRITING_APP-eng: mkvmerge v9.8.0 ('Kuglblids') 64bit
      _STATISTICS_WRITING_DATE_UTC: 2017-04-06 08:22:08
      _STATISTICS_WRITING_DATE_UTC-eng: 2017-04-06 08:22:08
      _STATISTICS_TAGS: BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES
      _STATISTICS_TAGS-eng: BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES

Signed-off-by: Nicolas George <george@nsup.org>
---
 ffmpeg_opt.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 48 insertions(+), 1 deletion(-)


Note: as written in a comment, this code ignores the return value of
av_dict_set(), because the surrounding code ignores the return value of
av_dict_copy(). A lot of unchecked errors happen in this function, but I am
not reworking a 2700-lines function today.

Comments

wm4 April 6, 2017, 11:01 a.m. UTC | #1
On Thu,  6 Apr 2017 12:27:43 +0200
Nicolas George <george@nsup.org> wrote:

> They are probably not valid for the resulting file.
> They look like this:
>       BPS             : 40665
>       BPS-eng         : 40665
>       DURATION        : 00:00:20.000000000
>       DURATION-eng    : 00:00:20.000000000
>       NUMBER_OF_FRAMES: 10
>       NUMBER_OF_FRAMES-eng: 10
>       NUMBER_OF_BYTES : 101664
>       NUMBER_OF_BYTES-eng: 101664
>       _STATISTICS_WRITING_APP: mkvmerge v9.8.0 ('Kuglblids') 64bit
>       _STATISTICS_WRITING_APP-eng: mkvmerge v9.8.0 ('Kuglblids') 64bit
>       _STATISTICS_WRITING_DATE_UTC: 2017-04-06 08:22:08
>       _STATISTICS_WRITING_DATE_UTC-eng: 2017-04-06 08:22:08
>       _STATISTICS_TAGS: BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES
>       _STATISTICS_TAGS-eng: BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES
> 
> Signed-off-by: Nicolas George <george@nsup.org>
> ---
>  ffmpeg_opt.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 48 insertions(+), 1 deletion(-)
> 
> 
> Note: as written in a comment, this code ignores the return value of
> av_dict_set(), because the surrounding code ignores the return value of
> av_dict_copy(). A lot of unchecked errors happen in this function, but I am
> not reworking a 2700-lines function today.
> 
> 
> diff --git a/ffmpeg_opt.c b/ffmpeg_opt.c
> index d1fe8742ff..f72f38796a 100644
> --- a/ffmpeg_opt.c
> +++ b/ffmpeg_opt.c
> @@ -497,6 +497,53 @@ static void parse_meta_type(char *arg, char *type, int *index, const char **stre
>          *type = 'g';
>  }
>  
> +/**
> + * Copy a dictionary except the stats metadata added by mkvmerge.
> + * They are probably not valid for the resulting file.
> + *
> + * FIXME Return values are ignored by the surrounding code and here.
> + */
> +static void dict_copy_nostats(AVDictionary **dst, const AVDictionary *src)
> +{
> +    AVDictionaryEntry *t;
> +    const char *tags = NULL, *p, *q;
> +    size_t len;
> +
> +    t = av_dict_get(src, "_STATISTICS_TAGS", NULL, AV_DICT_MATCH_CASE);
> +    if (t)
> +        tags = t->value;
> +    t = NULL;
> +    while ((t = av_dict_get(src, "", t, AV_DICT_IGNORE_SUFFIX))) {
> +        if (av_strstart(t->key, "_STATISTICS_", NULL))
> +            continue;
> +        if (tags) {
> +            /* tags is a space-separated list of stats tags */
> +            p = tags;
> +            while (*p) {
> +                q = strchr(p, ' ');
> +                len = q ? q - p : strlen(p);
> +                if (!q)
> +                    q = p + strlen(p);
> +#define ff_islower(c) ((unsigned)((c) - 'a') < 26)
> +                if (!memcmp(t->key, p, len) &&
> +                    (!t->key[len] ||
> +                     /* also remove TAG-lng */
> +                     (t->key[len] == '-' &&
> +                      ff_islower(t->key[len + 1]) &&
> +                      ff_islower(t->key[len + 2]) &&
> +                      ff_islower(t->key[len + 3]) &&
> +                      !t->key[len + 4])))
> +                    break;
> +#undef ff_islower
> +                for (p += len; *p == ' '; p++);
> +            }
> +            if (*p) /* match found => do not copy */
> +                continue;
> +        }
> +        av_dict_set(dst, t->key, t->value, AV_DICT_DONT_OVERWRITE);
> +    }
> +}
> +
>  static int copy_metadata(char *outspec, char *inspec, AVFormatContext *oc, AVFormatContext *ic, OptionsContext *o)
>  {
>      AVDictionary **meta_in = NULL;
> @@ -2546,7 +2593,7 @@ loop_end:
>              if (output_streams[i]->source_index < 0)         /* this is true e.g. for attached files */
>                  continue;
>              ist = input_streams[output_streams[i]->source_index];
> -            av_dict_copy(&output_streams[i]->st->metadata, ist->st->metadata, AV_DICT_DONT_OVERWRITE);
> +            dict_copy_nostats(&output_streams[i]->st->metadata, ist->st->metadata);
>              if (!output_streams[i]->stream_copy) {
>                  av_dict_set(&output_streams[i]->st->metadata, "encoder", NULL, 0);
>              }

I sure love format-specific hacks in ffmpeg CLI!

I don't think this patch is acceptable.

I went on trying to find out how Matroska readers are supposed to
handle this:

<wm4> mosu: what's the correct way to discard _STATISTICS tags if you want only "actual" tags the user added?
<wm4> matching by name doesn't work, because there are many names to be considered and they don't all start with _ or _STATISTICS
<mosu> You mean in the context of programs other than mkvmerge?
<wm4> yes
<mosu> mkvmerge adds one tag called _STATISTICS_TAGS which includes the a space-separated list of names of "normal" tags it has added. Additionally it adds _STATISTICS_WRITING_APP and _STATISTICS_WRITING_DATE.
<wm4> that's... painful
<mosu> There's nothing in the specs about this, though; it's a convention created by/invented for mkvmerge

So that's what you have to implement in the demuxer.
diff mbox

Patch

diff --git a/ffmpeg_opt.c b/ffmpeg_opt.c
index d1fe8742ff..f72f38796a 100644
--- a/ffmpeg_opt.c
+++ b/ffmpeg_opt.c
@@ -497,6 +497,53 @@  static void parse_meta_type(char *arg, char *type, int *index, const char **stre
         *type = 'g';
 }
 
+/**
+ * Copy a dictionary except the stats metadata added by mkvmerge.
+ * They are probably not valid for the resulting file.
+ *
+ * FIXME Return values are ignored by the surrounding code and here.
+ */
+static void dict_copy_nostats(AVDictionary **dst, const AVDictionary *src)
+{
+    AVDictionaryEntry *t;
+    const char *tags = NULL, *p, *q;
+    size_t len;
+
+    t = av_dict_get(src, "_STATISTICS_TAGS", NULL, AV_DICT_MATCH_CASE);
+    if (t)
+        tags = t->value;
+    t = NULL;
+    while ((t = av_dict_get(src, "", t, AV_DICT_IGNORE_SUFFIX))) {
+        if (av_strstart(t->key, "_STATISTICS_", NULL))
+            continue;
+        if (tags) {
+            /* tags is a space-separated list of stats tags */
+            p = tags;
+            while (*p) {
+                q = strchr(p, ' ');
+                len = q ? q - p : strlen(p);
+                if (!q)
+                    q = p + strlen(p);
+#define ff_islower(c) ((unsigned)((c) - 'a') < 26)
+                if (!memcmp(t->key, p, len) &&
+                    (!t->key[len] ||
+                     /* also remove TAG-lng */
+                     (t->key[len] == '-' &&
+                      ff_islower(t->key[len + 1]) &&
+                      ff_islower(t->key[len + 2]) &&
+                      ff_islower(t->key[len + 3]) &&
+                      !t->key[len + 4])))
+                    break;
+#undef ff_islower
+                for (p += len; *p == ' '; p++);
+            }
+            if (*p) /* match found => do not copy */
+                continue;
+        }
+        av_dict_set(dst, t->key, t->value, AV_DICT_DONT_OVERWRITE);
+    }
+}
+
 static int copy_metadata(char *outspec, char *inspec, AVFormatContext *oc, AVFormatContext *ic, OptionsContext *o)
 {
     AVDictionary **meta_in = NULL;
@@ -2546,7 +2593,7 @@  loop_end:
             if (output_streams[i]->source_index < 0)         /* this is true e.g. for attached files */
                 continue;
             ist = input_streams[output_streams[i]->source_index];
-            av_dict_copy(&output_streams[i]->st->metadata, ist->st->metadata, AV_DICT_DONT_OVERWRITE);
+            dict_copy_nostats(&output_streams[i]->st->metadata, ist->st->metadata);
             if (!output_streams[i]->stream_copy) {
                 av_dict_set(&output_streams[i]->st->metadata, "encoder", NULL, 0);
             }