diff mbox

[FFmpeg-devel] libavformat/dashdec: Fix for ticket 6658 (Dash demuxer segfault)

Message ID DM5PR22MB06810F458A072289AE0B5CAEFE060@DM5PR22MB0681.namprd22.prod.outlook.com
State Superseded
Headers show

Commit Message

Colin NG Dec. 26, 2017, 1:12 a.m. UTC
- Add function 'resolve_content_path' to propagate the baseURL from upper level nodes.
 * if no baseURL is available, the path of mpd file will be set as the baseURL.
- Remove checking for newly established connection.
- Establish the communication protocol in each connection rather than applying one protocol to all connection.
---
 libavformat/dashdec.c | 107 ++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 94 insertions(+), 13 deletions(-)

Comments

Tomas Härdin Dec. 26, 2017, 8:56 p.m. UTC | #1
tis 2017-12-26 klockan 01:12 +0000 skrev Colin NG:
> +static int resolve_content_path(AVFormatContext *s, const char *url, xmlNodePtr *baseurl_nodes, int n_baseurl_nodes) {
> +
> +    char *tmp_str = av_mallocz(MAX_URL_SIZE);
> +    char *path = av_mallocz(MAX_URL_SIZE);
> +    char *mpdName = NULL;
> +    xmlNodePtr  node = NULL;
> +    char *baseurl = NULL;
> +    char *root_url = NULL;
> +    char *text = NULL;
> +
> +    int isRootHttp = 0;
> +    char token ='/';
> +    int start =  0;
> +    int rootId = 0;
> +    int updated = 0;
> +    int size = 0;
> +    int i;
> +
> +    if (!tmp_str || !path) {
> +        updated = AVERROR(ENOMEM);
> +        goto end;
> +    }
> +
> +    av_strlcpy(tmp_str, url, strlen(url) + 1);

MAX_URL_SIZE maybe? Is there some way url can be too long?

> +    mpdName = strtok (tmp_str, "/");
> +    while (mpdName = strtok (NULL, "/")) {
> +        size = strlen(mpdName);
> +    }

strtok() isn't thread safe. I forget if we guarantee thread safeness at
this stage in lavf

> +
> +    av_strlcpy (path, url, strlen(url) - size + 1);

Similarly here. Plus strlen(url) being computed again

> +    for (rootId = n_baseurl_nodes - 1; rootId > 0; rootId --) {
> +        if (!(node = baseurl_nodes[rootId])) {
> +            continue;
> +        }
> +        if (ishttp(xmlNodeGetContent(node))) {
> +            break;
> +        }
> +    }
> +
> +    node = baseurl_nodes[rootId];
> +    baseurl = xmlNodeGetContent(node);
> +    root_url = (av_strcasecmp(baseurl, "")) ? baseurl : path;
> +    if (node) {
> +        xmlNodeSetContent(node, root_url);
> +    }
> +
> +    size = strlen(root_url);
> +    isRootHttp = ishttp(root_url);
> +
> +    if (root_url[size - 1] == token) {
> +        av_strlcat(root_url, "/", size + 2);
> +        size += 2;
> +    }
> +
> +    for (i = 0; i < n_baseurl_nodes; ++i) {
> +        if (i == rootId) {
> +            continue;
> +        }
> +        text = xmlNodeGetContent(baseurl_nodes[i]);
> +        if (text) {
> +            memset(tmp_str, 0, strlen(tmp_str));
> +            if (!ishttp(text) && isRootHttp) {
> +                av_strlcpy(tmp_str, root_url, size + 1);
> +            }
> +            start = (text[0] == token);
> +            av_strlcat(tmp_str, text + start, MAX_URL_SIZE);
> +            xmlNodeSetContent(baseurl_nodes[i], tmp_str);
> +            updated = 1;
> +            xmlFree(text);
> +        }
> +    }
> +
> +end:
> +    av_free(path);
> +    av_free(tmp_str);
> +    return updated;
> +
> +}

I don't really like lavf doing URL parsing/manipulation like this when
there's (supposedly) mature libraries to do it. Especially after this
year's Blackhat revelations of exploits making use of how whitespace,
unicode, slashes, percent encoding etc are handled differently in every
library [1]. But I'm also not familiar with DASH so can't say what
would be the typical thing to do

/Tomas

[1] https://www.blackhat.com/docs/us-17/thursday/us-17-Tsai-A-New-Era-O
f-SSRF-Exploiting-URL-Parser-In-Trending-Programming-Languages.pdf
diff mbox

Patch

diff --git a/libavformat/dashdec.c b/libavformat/dashdec.c
index 3798649..e41ba96 100644
--- a/libavformat/dashdec.c
+++ b/libavformat/dashdec.c
@@ -148,6 +148,11 @@  static uint64_t get_current_time_in_sec(void)
     return  av_gettime() / 1000000;
 }
 
+static int ishttp(char *url) {
+    const char *proto_name = avio_find_protocol_name(url);
+    return av_strstart(proto_name, "http", NULL);
+}
+
 static uint64_t get_utc_date_time_insec(AVFormatContext *s, const char *datetime)
 {
     struct tm timeinfo;
@@ -392,7 +397,8 @@  static int open_url(AVFormatContext *s, AVIOContext **pb, const char *url,
     else if (strcmp(proto_name, "file") || !strncmp(url, "file,", 5))
         return AVERROR_INVALIDDATA;
 
-    ret = s->io_open(s, pb, url, AVIO_FLAG_READ, &tmp);
+    av_freep(pb);
+    ret = avio_open2(pb, url, AVIO_FLAG_READ, c->interrupt_callback, &tmp);
     if (ret >= 0) {
         // update cookies on http response with setcookies.
         char *new_cookies = NULL;
@@ -639,6 +645,84 @@  static int parse_manifest_segmenttimeline(AVFormatContext *s, struct representat
     return 0;
 }
 
+static int resolve_content_path(AVFormatContext *s, const char *url, xmlNodePtr *baseurl_nodes, int n_baseurl_nodes) {
+
+    char *tmp_str = av_mallocz(MAX_URL_SIZE);
+    char *path = av_mallocz(MAX_URL_SIZE);
+    char *mpdName = NULL;
+    xmlNodePtr  node = NULL;
+    char *baseurl = NULL;
+    char *root_url = NULL;
+    char *text = NULL;
+
+    int isRootHttp = 0;
+    char token ='/';
+    int start =  0;
+    int rootId = 0;
+    int updated = 0;
+    int size = 0;
+    int i;
+
+    if (!tmp_str || !path) {
+        updated = AVERROR(ENOMEM);
+        goto end;
+    }
+
+    av_strlcpy(tmp_str, url, strlen(url) + 1);
+    mpdName = strtok (tmp_str, "/");
+    while (mpdName = strtok (NULL, "/")) {
+        size = strlen(mpdName);
+    }
+
+    av_strlcpy (path, url, strlen(url) - size + 1);
+    for (rootId = n_baseurl_nodes - 1; rootId > 0; rootId --) {
+        if (!(node = baseurl_nodes[rootId])) {
+            continue;
+        }
+        if (ishttp(xmlNodeGetContent(node))) {
+            break;
+        }
+    }
+
+    node = baseurl_nodes[rootId];
+    baseurl = xmlNodeGetContent(node);
+    root_url = (av_strcasecmp(baseurl, "")) ? baseurl : path;
+    if (node) {
+        xmlNodeSetContent(node, root_url);
+    }
+
+    size = strlen(root_url);
+    isRootHttp = ishttp(root_url);
+
+    if (root_url[size - 1] == token) {
+        av_strlcat(root_url, "/", size + 2);
+        size += 2;
+    }
+
+    for (i = 0; i < n_baseurl_nodes; ++i) {
+        if (i == rootId) {
+            continue;
+        }
+        text = xmlNodeGetContent(baseurl_nodes[i]);
+        if (text) {
+            memset(tmp_str, 0, strlen(tmp_str));
+            if (!ishttp(text) && isRootHttp) {
+                av_strlcpy(tmp_str, root_url, size + 1);
+            }
+            start = (text[0] == token);
+            av_strlcat(tmp_str, text + start, MAX_URL_SIZE);
+            xmlNodeSetContent(baseurl_nodes[i], tmp_str);
+            updated = 1;
+            xmlFree(text);
+        }
+    }
+
+end:
+    av_free(path);
+    av_free(tmp_str);
+    return updated;
+
+}
 static int parse_manifest_representation(AVFormatContext *s, const char *url,
                                          xmlNodePtr node,
                                          xmlNodePtr adaptionset_node,
@@ -698,6 +782,11 @@  static int parse_manifest_representation(AVFormatContext *s, const char *url,
         baseurl_nodes[2] = adaptionset_baseurl_node;
         baseurl_nodes[3] = representation_baseurl_node;
 
+        ret = resolve_content_path(s, url, baseurl_nodes, 4);
+        if (ret == AVERROR(ENOMEM) || ret == 0) {
+            goto end;
+        }
+
         if (representation_segmenttemplate_node || fragment_template_node) {
             fragment_timeline_node = NULL;
             fragment_templates_tab[0] = representation_segmenttemplate_node;
@@ -993,6 +1082,9 @@  static int parse_manifest(AVFormatContext *s, const char *url, AVIOContext *in)
         }
 
         mpd_baseurl_node = find_child_node_by_name(node, "BaseURL");
+        if (!mpd_baseurl_node) {
+            mpd_baseurl_node = xmlNewNode(NULL, "BaseURL");
+        }
 
         // at now we can handle only one period, with the longest duration
         node = xmlFirstElementChild(node);
@@ -1315,6 +1407,7 @@  static int read_from_url(struct representation *pls, struct fragment *seg,
     } else {
         ret = avio_read(pls->input, buf, buf_size);
     }
+
     if (ret > 0)
         pls->cur_seg_offset += ret;
 
@@ -1343,18 +1436,6 @@  static int open_input(DASHContext *c, struct representation *pls, struct fragmen
         goto cleanup;
     }
 
-    /* Seek to the requested position. If this was a HTTP request, the offset
-     * should already be where want it to, but this allows e.g. local testing
-     * without a HTTP server. */
-    if (!ret && seg->url_offset) {
-        int64_t seekret = avio_seek(pls->input, seg->url_offset, SEEK_SET);
-        if (seekret < 0) {
-            av_log(pls->parent, AV_LOG_ERROR, "Unable to seek to offset %"PRId64" of DASH fragment '%s'\n", seg->url_offset, seg->url);
-            ret = (int) seekret;
-            ff_format_io_close(pls->parent, &pls->input);
-        }
-    }
-
 cleanup:
     av_dict_free(&opts);
     pls->cur_seg_offset = 0;