[FFmpeg-devel] libavformat/dashdec: Fix for ticket 6658 (Dash demuxer segfault)

Submitted by Colin NG on Dec. 4, 2017, 4:28 a.m.

Details

Message ID DM5PR22MB068130812B5B6F00FFF9CFA2FE3C0@DM5PR22MB0681.namprd22.prod.outlook.com
State New
Headers show

Commit Message

Colin NG Dec. 4, 2017, 4:28 a.m.
---
 libavformat/dashdec.c | 112 ++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 99 insertions(+), 13 deletions(-)

Comments

Steven Liu Dec. 4, 2017, 9:52 a.m.
2017-12-04 12:28 GMT+08:00 Colin NG <colin_ng@hotmail.com>:
> ---
>  libavformat/dashdec.c | 112 ++++++++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 99 insertions(+), 13 deletions(-)
>
> diff --git a/libavformat/dashdec.c b/libavformat/dashdec.c
> index 3798649..d04bec0 100644
> --- a/libavformat/dashdec.c
> +++ b/libavformat/dashdec.c
> @@ -148,6 +148,11 @@ static uint64_t get_current_time_in_sec(void)
>      return  av_gettime() / 1000000;
>  }
>
> +static char * ishttp(char *url) {
> +    char *proto_name = avio_find_protocol_name(url);
> +    return av_strstart(proto_name, "http", NULL);
> +}
> +
>  static uint64_t get_utc_date_time_insec(AVFormatContext *s, const char *datetime)
>  {
>      struct tm timeinfo;
> @@ -392,7 +397,9 @@ static int open_url(AVFormatContext *s, AVIOContext **pb, const char *url,
>      else if (strcmp(proto_name, "file") || !strncmp(url, "file,", 5))
>          return AVERROR_INVALIDDATA;
>
> -    ret = s->io_open(s, pb, url, AVIO_FLAG_READ, &tmp);
> +    av_freep(pb);
> +    ret = avio_open2(pb, url, AVIO_FLAG_READ, c->interrupt_callback, &tmp);
> +
>      if (ret >= 0) {
>          // update cookies on http response with setcookies.
>          char *new_cookies = NULL;
> @@ -639,6 +646,87 @@ static int parse_manifest_segmenttimeline(AVFormatContext *s, struct representat
>      return 0;
>  }
>
> +static int resolve_content_path(AVFormatContext *s, const char *url,  xmlNodePtr *baseurl_nodes,  int n_baseurl_nodes) {
> +
> +    int i;
> +    char *text;
> +    char *tmp_str = av_mallocz(MAX_URL_SIZE);
> +    char *tmp_str_2= av_mallocz(MAX_URL_SIZE);
> +
> +    char *path = av_mallocz(MAX_URL_SIZE);
> +    int nameSize = 0;
> +    int updated = 0;
> +
> +    if (!tmp_str || !tmp_str_2 || !path) {
> +        updated = AVERROR(ENOMEM);
> +        goto end;
> +    }
> +
> +    av_strlcpy(tmp_str, url, strlen(url)+1);
> +    char *mpdName = strtok (tmp_str," /");
> +
> +    while ((mpdName =strtok (NULL, "/"))) {
What dose this do?

> +        nameSize = strlen(mpdName);
> +    }
> +
> +    av_strlcpy (path, url, strlen(url)-nameSize+1);
> +
> +    int rootId = 0;
> +    xmlNodePtr  *node = NULL;
> +    for (rootId = n_baseurl_nodes-1; rootId >0; rootId--) {
> +        if (!(node = baseurl_nodes[rootId])) {
> +            continue;
> +        }
> +        if (ishttp(xmlNodeGetContent(node))) {
> +            break;
> +        }
> +    }
> +
> +    node = baseurl_nodes[rootId];
> +    char *baseurl = xmlNodeGetContent(node);
> +    char *root_url = (!av_strcasecmp(baseurl, ""))? path: baseurl;
> +
> +    if (node) {
> +        xmlNodeSetContent(node, root_url);
> +    }
> +
> +    int size = strlen(root_url);
> +    char *isRootHttp= ishttp(root_url);
> +
> +    char token ='/';
> +    //if (root_url[size-1]==token) {
> +    if (av_strncasecmp(&root_url[size-1],&token, 1) != 0) {
If this only compare 1 char, why don't use root_url[size-1] == token ?
> +        av_strlcat(root_url, "/", size+2);
> +        size+=2;
> +    }
> +
> +    for (i = 0; i < n_baseurl_nodes; ++i) {
> +        if (i==rootId) {
> +            continue;
> +        }
> +        text = xmlNodeGetContent(baseurl_nodes[i]);
> +        if (text) {
> +            memset(tmp_str, 0, strlen(tmp_str));
> +
> +            if (!ishttp(text) && isRootHttp) {
> +                av_strlcpy(tmp_str, root_url, size+1);
> +            }
> +            int start = (text[0]==token) ? 1: 0;
> +            memset(tmp_str_2, 0, strlen(tmp_str_2));
> +            av_strlcat(tmp_str, text+start, MAX_URL_SIZE);
> +            xmlFree(text);
> +            xmlNodeSetContent(baseurl_nodes[i], tmp_str);
> +            updated = 1;
> +        }
> +    }
> +
> +end:
> +    av_free(path);
> +    av_free(tmp_str);
> +    av_free(tmp_str_2);
> +    return updated;
> +
> +}
>  static int parse_manifest_representation(AVFormatContext *s, const char *url,
>                                           xmlNodePtr node,
>                                           xmlNodePtr adaptionset_node,
> @@ -698,6 +786,12 @@ static int parse_manifest_representation(AVFormatContext *s, const char *url,
>          baseurl_nodes[2] = adaptionset_baseurl_node;
>          baseurl_nodes[3] = representation_baseurl_node;
>
> +        ret = resolve_content_path(s, url, baseurl_nodes, 4);
> +
> +        if (ret == AVERROR(ENOMEM)) {
What about ret < 0 ?
> +            goto end;
> +        }
> +
>          if (representation_segmenttemplate_node || fragment_template_node) {
>              fragment_timeline_node = NULL;
>              fragment_templates_tab[0] = representation_segmenttemplate_node;
> @@ -993,6 +1087,9 @@ static int parse_manifest(AVFormatContext *s, const char *url, AVIOContext *in)
>          }
>
>          mpd_baseurl_node = find_child_node_by_name(node, "BaseURL");
> +        if (!mpd_baseurl_node) {
> +            mpd_baseurl_node = xmlNewNode(node, "BaseURL");
> +        }
>
>          // at now we can handle only one period, with the longest duration
>          node = xmlFirstElementChild(node);
> @@ -1315,6 +1412,7 @@ static int read_from_url(struct representation *pls, struct fragment *seg,
>      } else {
>          ret = avio_read(pls->input, buf, buf_size);
>      }
> +
>      if (ret > 0)
>          pls->cur_seg_offset += ret;
>
> @@ -1343,18 +1441,6 @@ static int open_input(DASHContext *c, struct representation *pls, struct fragmen
>          goto cleanup;
>      }
>
> -    /* Seek to the requested position. If this was a HTTP request, the offset
> -     * should already be where want it to, but this allows e.g. local testing
> -     * without a HTTP server. */
> -    if (!ret && seg->url_offset) {
> -        int64_t seekret = avio_seek(pls->input, seg->url_offset, SEEK_SET);
> -        if (seekret < 0) {
> -            av_log(pls->parent, AV_LOG_ERROR, "Unable to seek to offset %"PRId64" of DASH fragment '%s'\n", seg->url_offset, seg->url);
> -            ret = (int) seekret;
> -            ff_format_io_close(pls->parent, &pls->input);
> -        }
> -    }
> -
>  cleanup:
>      av_dict_free(&opts);
>      pls->cur_seg_offset = 0;
> --
> 2.7.4
>

Thanks


Steven
Derek Buitenhuis Dec. 4, 2017, 2:01 p.m.
On 12/4/2017 4:28 AM, Colin NG wrote:
> ---
>  libavformat/dashdec.c | 112 ++++++++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 99 insertions(+), 13 deletions(-)

Please describe what is actually being changed, and why, in the
commit message. It is both hard to review with no description,
and incredibly annoying to git blame later, without a proper
commit message. For example, a bunch of these changes seem
pretty disparate, hence my 'Why?' after several.

> +static char * ishttp(char *url) {
> +    char *proto_name = avio_find_protocol_name(url);
> +    return av_strstart(proto_name, "http", NULL);
> +}

Is the URL guaranteed to have a known, and enabled (in avforma) protocol?
If not, then this can crash, because avio_find_procotol_name will return
NULL.

> -    ret = s->io_open(s, pb, url, AVIO_FLAG_READ, &tmp);
> +    av_freep(pb);
> +    ret = avio_open2(pb, url, AVIO_FLAG_READ, c->interrupt_callback, &tmp);

Why?
 
> +static int resolve_content_path(AVFormatContext *s, const char *url,  xmlNodePtr *baseurl_nodes,  int n_baseurl_nodes) {
> +
> +    int i;
> +    char *text;
> +    char *tmp_str = av_mallocz(MAX_URL_SIZE);
> +    char *tmp_str_2= av_mallocz(MAX_URL_SIZE);
> +
> +    char *path = av_mallocz(MAX_URL_SIZE);

If the size is known (and not massive), why are these heap allocations?

> +    int nameSize = 0;
> +    int updated = 0;
> +
> +    if (!tmp_str || !tmp_str_2 || !path) {
> +        updated = AVERROR(ENOMEM);
> +        goto end;
> +    }
> +
> +    av_strlcpy(tmp_str, url, strlen(url)+1);
> +    char *mpdName = strtok (tmp_str," /");

Don't mix declarations and code.

> +
> +    while ((mpdName =strtok (NULL, "/"))) {
> +        nameSize = strlen(mpdName);
> +    }
> +
> +    av_strlcpy (path, url, strlen(url)-nameSize+1);
> +
> +    int rootId = 0;
> +    xmlNodePtr  *node = NULL;

Ditto.

> +    for (rootId = n_baseurl_nodes-1; rootId >0; rootId--) {
> +        if (!(node = baseurl_nodes[rootId])) {
> +            continue;
> +        }
> +        if (ishttp(xmlNodeGetContent(node))) {
> +            break;
> +        }
> +    }
> +
> +    node = baseurl_nodes[rootId];
> +    char *baseurl = xmlNodeGetContent(node);
> +    char *root_url = (!av_strcasecmp(baseurl, ""))? path: baseurl;

Ditto.

Also should all of these calls to the XML lib have checks? My gut says 'yes'.

> +
> +    if (node) {
> +        xmlNodeSetContent(node, root_url);
> +    }
> +
> +    int size = strlen(root_url);
> +    char *isRootHttp= ishttp(root_url);
> +
> +    char token ='/';

Ditto.

> +    //if (root_url[size-1]==token) {
> +    if (av_strncasecmp(&root_url[size-1],&token, 1) != 0) {
> +        av_strlcat(root_url, "/", size+2);
> +        size+=2;
> +    }
> +
> +    for (i = 0; i < n_baseurl_nodes; ++i) {
> +        if (i==rootId) {
> +            continue;
> +        }
> +        text = xmlNodeGetContent(baseurl_nodes[i]);
> +        if (text) {
> +            memset(tmp_str, 0, strlen(tmp_str));
> +
> +            if (!ishttp(text) && isRootHttp) {
> +                av_strlcpy(tmp_str, root_url, size+1);
> +            }
> +            int start = (text[0]==token) ? 1: 0;

Mixed code and variable declarations. Also, redundant ternary operation.

> +            memset(tmp_str_2, 0, strlen(tmp_str_2));
> +            av_strlcat(tmp_str, text+start, MAX_URL_SIZE);
> +            xmlFree(text);
> +            xmlNodeSetContent(baseurl_nodes[i], tmp_str);
> +            updated = 1;

What's with the odd 0/1 return values, which are not even
checked anywhere?

> +        }
> +    }
> +
> +end:
> +    av_free(path);
> +    av_free(tmp_str);
> +    av_free(tmp_str_2);
> +    return updated;
> +
> +}
>  static int parse_manifest_representation(AVFormatContext *s, const char *url,
>                                           xmlNodePtr node,
>                                           xmlNodePtr adaptionset_node,
> @@ -698,6 +786,12 @@ static int parse_manifest_representation(AVFormatContext *s, const char *url,
>          baseurl_nodes[2] = adaptionset_baseurl_node;
>          baseurl_nodes[3] = representation_baseurl_node;
>  
> +        ret = resolve_content_path(s, url, baseurl_nodes, 4);
> +
> +        if (ret == AVERROR(ENOMEM)) {
> +            goto end;
> +        }

This kind of check seems very wrong. Check for ret < 0.

>          mpd_baseurl_node = find_child_node_by_name(node, "BaseURL");
> +        if (!mpd_baseurl_node) {
> +            mpd_baseurl_node = xmlNewNode(node, "BaseURL");
> +        }

Why? Also missing check for xmlNewNode ret value?

>  
>          // at now we can handle only one period, with the longest duration
>          node = xmlFirstElementChild(node);
> @@ -1315,6 +1412,7 @@ static int read_from_url(struct representation *pls, struct fragment *seg,
>      } else {
>          ret = avio_read(pls->input, buf, buf_size);
>      }
> +
>      if (ret > 0)

Stray change.

> -    /* Seek to the requested position. If this was a HTTP request, the offset
> -     * should already be where want it to, but this allows e.g. local testing
> -     * without a HTTP server. */
> -    if (!ret && seg->url_offset) {
> -        int64_t seekret = avio_seek(pls->input, seg->url_offset, SEEK_SET);
> -        if (seekret < 0) {
> -            av_log(pls->parent, AV_LOG_ERROR, "Unable to seek to offset %"PRId64" of DASH fragment '%s'\n", seg->url_offset, seg->url);
> -            ret = (int) seekret;
> -            ff_format_io_close(pls->parent, &pls->input);
> -        }
> -    }

Why?

- Derek
Colin NG Dec. 5, 2017, 11:57 p.m.

Patch hide | download patch | download mbox

diff --git a/libavformat/dashdec.c b/libavformat/dashdec.c
index 3798649..d04bec0 100644
--- a/libavformat/dashdec.c
+++ b/libavformat/dashdec.c
@@ -148,6 +148,11 @@  static uint64_t get_current_time_in_sec(void)
     return  av_gettime() / 1000000;
 }
 
+static char * ishttp(char *url) {
+    char *proto_name = avio_find_protocol_name(url);
+    return av_strstart(proto_name, "http", NULL);
+}
+
 static uint64_t get_utc_date_time_insec(AVFormatContext *s, const char *datetime)
 {
     struct tm timeinfo;
@@ -392,7 +397,9 @@  static int open_url(AVFormatContext *s, AVIOContext **pb, const char *url,
     else if (strcmp(proto_name, "file") || !strncmp(url, "file,", 5))
         return AVERROR_INVALIDDATA;
 
-    ret = s->io_open(s, pb, url, AVIO_FLAG_READ, &tmp);
+    av_freep(pb);
+    ret = avio_open2(pb, url, AVIO_FLAG_READ, c->interrupt_callback, &tmp);
+
     if (ret >= 0) {
         // update cookies on http response with setcookies.
         char *new_cookies = NULL;
@@ -639,6 +646,87 @@  static int parse_manifest_segmenttimeline(AVFormatContext *s, struct representat
     return 0;
 }
 
+static int resolve_content_path(AVFormatContext *s, const char *url,  xmlNodePtr *baseurl_nodes,  int n_baseurl_nodes) {
+
+    int i;
+    char *text;
+    char *tmp_str = av_mallocz(MAX_URL_SIZE);
+    char *tmp_str_2= av_mallocz(MAX_URL_SIZE);
+
+    char *path = av_mallocz(MAX_URL_SIZE);
+    int nameSize = 0;
+    int updated = 0;
+
+    if (!tmp_str || !tmp_str_2 || !path) {
+        updated = AVERROR(ENOMEM);
+        goto end;
+    }
+
+    av_strlcpy(tmp_str, url, strlen(url)+1);
+    char *mpdName = strtok (tmp_str," /");
+
+    while ((mpdName =strtok (NULL, "/"))) {
+        nameSize = strlen(mpdName);
+    }
+
+    av_strlcpy (path, url, strlen(url)-nameSize+1);
+
+    int rootId = 0;
+    xmlNodePtr  *node = NULL;
+    for (rootId = n_baseurl_nodes-1; rootId >0; rootId--) {
+        if (!(node = baseurl_nodes[rootId])) {
+            continue;
+        }
+        if (ishttp(xmlNodeGetContent(node))) {
+            break;
+        }
+    }
+
+    node = baseurl_nodes[rootId];
+    char *baseurl = xmlNodeGetContent(node);
+    char *root_url = (!av_strcasecmp(baseurl, ""))? path: baseurl;
+
+    if (node) {
+        xmlNodeSetContent(node, root_url);
+    }
+
+    int size = strlen(root_url);
+    char *isRootHttp= ishttp(root_url);
+
+    char token ='/';
+    //if (root_url[size-1]==token) {
+    if (av_strncasecmp(&root_url[size-1],&token, 1) != 0) {
+        av_strlcat(root_url, "/", size+2);
+        size+=2;
+    }
+
+    for (i = 0; i < n_baseurl_nodes; ++i) {
+        if (i==rootId) {
+            continue;
+        }
+        text = xmlNodeGetContent(baseurl_nodes[i]);
+        if (text) {
+            memset(tmp_str, 0, strlen(tmp_str));
+
+            if (!ishttp(text) && isRootHttp) {
+                av_strlcpy(tmp_str, root_url, size+1);
+            }
+            int start = (text[0]==token) ? 1: 0;
+            memset(tmp_str_2, 0, strlen(tmp_str_2));
+            av_strlcat(tmp_str, text+start, MAX_URL_SIZE);
+            xmlFree(text);
+            xmlNodeSetContent(baseurl_nodes[i], tmp_str);
+            updated = 1;
+        }
+    }
+
+end:
+    av_free(path);
+    av_free(tmp_str);
+    av_free(tmp_str_2);
+    return updated;
+
+}
 static int parse_manifest_representation(AVFormatContext *s, const char *url,
                                          xmlNodePtr node,
                                          xmlNodePtr adaptionset_node,
@@ -698,6 +786,12 @@  static int parse_manifest_representation(AVFormatContext *s, const char *url,
         baseurl_nodes[2] = adaptionset_baseurl_node;
         baseurl_nodes[3] = representation_baseurl_node;
 
+        ret = resolve_content_path(s, url, baseurl_nodes, 4);
+
+        if (ret == AVERROR(ENOMEM)) {
+            goto end;
+        }
+
         if (representation_segmenttemplate_node || fragment_template_node) {
             fragment_timeline_node = NULL;
             fragment_templates_tab[0] = representation_segmenttemplate_node;
@@ -993,6 +1087,9 @@  static int parse_manifest(AVFormatContext *s, const char *url, AVIOContext *in)
         }
 
         mpd_baseurl_node = find_child_node_by_name(node, "BaseURL");
+        if (!mpd_baseurl_node) {
+            mpd_baseurl_node = xmlNewNode(node, "BaseURL");
+        }
 
         // at now we can handle only one period, with the longest duration
         node = xmlFirstElementChild(node);
@@ -1315,6 +1412,7 @@  static int read_from_url(struct representation *pls, struct fragment *seg,
     } else {
         ret = avio_read(pls->input, buf, buf_size);
     }
+
     if (ret > 0)
         pls->cur_seg_offset += ret;
 
@@ -1343,18 +1441,6 @@  static int open_input(DASHContext *c, struct representation *pls, struct fragmen
         goto cleanup;
     }
 
-    /* Seek to the requested position. If this was a HTTP request, the offset
-     * should already be where want it to, but this allows e.g. local testing
-     * without a HTTP server. */
-    if (!ret && seg->url_offset) {
-        int64_t seekret = avio_seek(pls->input, seg->url_offset, SEEK_SET);
-        if (seekret < 0) {
-            av_log(pls->parent, AV_LOG_ERROR, "Unable to seek to offset %"PRId64" of DASH fragment '%s'\n", seg->url_offset, seg->url);
-            ret = (int) seekret;
-            ff_format_io_close(pls->parent, &pls->input);
-        }
-    }
-
 cleanup:
     av_dict_free(&opts);
     pls->cur_seg_offset = 0;