From patchwork Thu Jul 30 15:10:09 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Nicolas George X-Patchwork-Id: 21386 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id E793F44AACC for ; Thu, 30 Jul 2020 18:10:19 +0300 (EEST) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id C67DD68B52B; Thu, 30 Jul 2020 18:10:19 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from nef.ens.fr (nef2.ens.fr [129.199.96.40]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 033A0680092 for ; Thu, 30 Jul 2020 18:10:12 +0300 (EEST) X-ENS-nef-client: 129.199.129.80 ( name = phare.normalesup.org ) Received: from phare.normalesup.org (phare.normalesup.org [129.199.129.80]) by nef.ens.fr (8.14.4/1.01.28121999) with ESMTP id 06UFACYd001466 for ; Thu, 30 Jul 2020 17:10:12 +0200 Received: by phare.normalesup.org (Postfix, from userid 1001) id 26AB9E00CF; Thu, 30 Jul 2020 17:10:12 +0200 (CEST) From: Nicolas George To: ffmpeg-devel@ffmpeg.org Date: Thu, 30 Jul 2020 17:10:09 +0200 Message-Id: <20200730151009.118835-2-george@nsup.org> X-Mailer: git-send-email 2.27.0 In-Reply-To: <20200730151009.118835-1-george@nsup.org> References: <20200730151009.118835-1-george@nsup.org> MIME-Version: 1.0 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.4.3 (nef.ens.fr [129.199.96.32]); Thu, 30 Jul 2020 17:10:12 +0200 (CEST) Subject: [FFmpeg-devel] [PATCH 2/2] lavf/url: rewrite ff_make_absolute_url() using ff_url_decompose(). X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" Also add and update some tests. Change the semantic a little, because for filesytem paths symlinks complicate things. See the comments in the code for detail. Signed-off-by: Nicolas George --- libavformat/tests/url.c | 17 ++- libavformat/url.c | 246 +++++++++++++++++++--------------------- libavformat/url.h | 4 +- tests/ref/fate/url | 13 ++- 4 files changed, 145 insertions(+), 135 deletions(-) diff --git a/libavformat/tests/url.c b/libavformat/tests/url.c index e7d259ab7d..ecd9725112 100644 --- a/libavformat/tests/url.c +++ b/libavformat/tests/url.c @@ -49,7 +49,13 @@ static void test_decompose(const char *url) static void test(const char *base, const char *rel) { char buf[200], buf2[200]; - ff_make_absolute_url(buf, sizeof(buf), base, rel); + int ret; + + ret = ff_make_absolute_url(buf, sizeof(buf), base, rel); + if (ret < 0) { + printf("%50s %-20s => error %s\n", base, rel, av_err2str(ret)); + return; + } printf("%50s %-20s => %s\n", base, rel, buf); if (base) { /* Test in-buffer replacement */ @@ -104,6 +110,15 @@ int main(void) test("http://server/foo/bar", "/../../../../../other/url"); test("http://server/foo/bar", "/test/../../../../../other/url"); test("http://server/foo/bar", "/test/../../test/../../../other/url"); + test("http://server/foo/bar", "file:../baz/qux"); + test("http://server/foo//bar/", "../../"); + test("file:../tmp/foo", "../bar/"); + test("file:../tmp/foo", "file:../bar/"); + test("http://server/foo/bar", "./"); + test("http://server/foo/bar", ".dotfile"); + test("http://server/foo/bar", "..doubledotfile"); + test("http://server/foo/bar", "double..dotfile"); + test("http://server/foo/bar", "doubledotfile.."); printf("\nTesting av_url_split:\n"); test2("/foo/bar"); diff --git a/libavformat/url.c b/libavformat/url.c index 26aaab4019..0c82317134 100644 --- a/libavformat/url.c +++ b/libavformat/url.c @@ -27,6 +27,7 @@ #if CONFIG_NETWORK #include "network.h" #endif +#include "libavutil/avassert.h" #include "libavutil/avstring.h" /** @@ -152,146 +153,131 @@ int ff_url_decompose(URLComponents *uc, const char *url, const char *end) return 0; } -static void trim_double_dot_url(char *buf, const char *rel, int size) +static int append_path(char *root, char *out_end, char **rout, + const char *in, const char *in_end) { - const char *p = rel; - const char *root = rel; - char tmp_path[MAX_URL_SIZE] = {0, }; - char *sep; - char *node; - - /* Get the path root of the url which start by "://" */ - if (p && (sep = strstr(p, "://"))) { - sep += 3; - root = strchr(sep, '/'); - if (!root) - return; - } - - /* set new current position if the root node is changed */ - p = root; - while (p && (node = strstr(p, ".."))) { - av_strlcat(tmp_path, p, node - p + strlen(tmp_path)); - p = node + 3; - sep = strrchr(tmp_path, '/'); - if (sep) - sep[0] = '\0'; - else - tmp_path[0] = '\0'; + char *out = *rout; + const char *d, *next; + + if (in < in_end && *in == '/') + in++; /* already taken care of */ + while (in < in_end) { + d = find_delim("/", in, in_end); + next = d + (d < in_end && *d == '/'); + if (d - in == 1 && in[0] == '.') { + /* skip */ + } else if (d - in == 2 && in[0] == '.' && in[1] == '.') { + av_assert1(out[-1] == '/'); + if (out - root > 1) + while (out > root && (--out)[-1] != '/'); + } else { + if (out_end - out < next - in) + return AVERROR(ENOMEM); + memcpy(out, in, next - in); + out += next - in; + } + in = next; } - - if (!av_stristart(p, "/", NULL) && root != rel) - av_strlcat(tmp_path, "/", size); - - av_strlcat(tmp_path, p, size); - /* start set buf after temp path process. */ - av_strlcpy(buf, rel, root - rel + 1); - - if (!av_stristart(tmp_path, "/", NULL) && root != rel) - av_strlcat(buf, "/", size); - - av_strlcat(buf, tmp_path, size); + *rout = out; + return 0; } -void ff_make_absolute_url(char *buf, int size, const char *base, +int ff_make_absolute_url(char *buf, int size, const char *base, const char *rel) { - char *sep, *path_query; - char *root, *p; - char tmp_path[MAX_URL_SIZE]; - - memset(tmp_path, 0, sizeof(tmp_path)); - /* Absolute path, relative to the current server */ - if (base && strstr(base, "://") && rel[0] == '/') { - if (base != buf) - av_strlcpy(buf, base, size); - sep = strstr(buf, "://"); - if (sep) { - /* Take scheme from base url */ - if (rel[1] == '/') { - sep[1] = '\0'; - } else { - /* Take scheme and host from base url */ - sep += 3; - sep = strchr(sep, '/'); - if (sep) - *sep = '\0'; - } - } - av_strlcat(buf, rel, size); - trim_double_dot_url(tmp_path, buf, size); - memset(buf, 0, size); - av_strlcpy(buf, tmp_path, size); - return; - } - /* If rel actually is an absolute url, just copy it */ - if (!base || strstr(rel, "://") || rel[0] == '/') { - memset(buf, 0, size); - trim_double_dot_url(buf, rel, size); - return; + URLComponents ub, uc; + char *out, *out_end, *path; + const char *keep, *base_path_end; + int use_base_path, simplify_path = 0, ret; + + /* This is tricky. + For HTTP, http://server/site/page + ../media/file + should resolve into http://media/file + but for filesystem access, dir/playlist + ../media/file + should resolve into dir/../media/file + because dir could be a symlink, and .. points to + the actual parent of the target directory. + + We'll consider that URLs with an actual scheme and authority, + i.e. starting with scheme://, need parent dir simplification, + while bare paths or pseudo-URLs starting with proto: without + the double slash do not. + */ + + if (!size) + return AVERROR(ENOMEM); + out = buf; + out_end = buf + size - 1; + + if ((ret = ff_url_decompose(&ub, base, NULL) < 0) || + (ret = ff_url_decompose(&uc, rel, NULL) < 0)) + return ret; + + keep = ub.url; +#define KEEP(component, also) do { \ + if (uc.url_component_end_##component == uc.url && \ + ub.url_component_end_##component > keep) { \ + keep = ub.url_component_end_##component; \ + also \ + } \ + } while (0) + KEEP(scheme, ); + KEEP(authority_full, simplify_path = 1;); + KEEP(path,); + KEEP(query,); + KEEP(fragment,); +#undef KEEP +#define COPY(start, end) do { \ + size_t len = end - start; \ + if (len > out_end - out) \ + return AVERROR(ENOMEM); \ + memcpy(out, start, len); \ + out += len; \ + } while (0) + COPY(ub.url, keep); + COPY(uc.url, uc.path); + if (keep > ub.path) + simplify_path = 0; + + use_base_path = URL_COMPONENT_HAVE(ub, path) && keep <= ub.path; + if (uc.path > uc.url) + use_base_path = 0; + if (URL_COMPONENT_HAVE(uc, scheme)) + simplify_path = 0; + if (URL_COMPONENT_HAVE(uc, authority)) + simplify_path = 1; + if (URL_COMPONENT_HAVE(uc, path) && uc.path[0] == '/') + use_base_path = 0; + if (use_base_path) { + base_path_end = ub.url_component_end_path; + if (URL_COMPONENT_HAVE(uc, path)) + while (base_path_end > ub.path && base_path_end[-1] != '/') + base_path_end--; } - if (base != buf) - av_strlcpy(buf, base, size); - - /* Strip off any query string from base */ - path_query = strchr(buf, '?'); - if (path_query) - *path_query = '\0'; - - /* Is relative path just a new query part? */ - if (rel[0] == '?') { - av_strlcat(buf, rel, size); - trim_double_dot_url(tmp_path, buf, size); - memset(buf, 0, size); - av_strlcpy(buf, tmp_path, size); - return; - } - - root = p = buf; - /* Get the path root of the url which start by "://" */ - if (p && strstr(p, "://")) { - sep = strstr(p, "://"); - if (sep) { - sep += 3; - root = strchr(sep, '/'); - if (!root) - return; - } - } - - /* Remove the file name from the base url */ - sep = strrchr(buf, '/'); - if (sep && sep <= root) - sep = root; - - if (sep) - sep[1] = '\0'; - else - buf[0] = '\0'; - while (av_strstart(rel, "..", NULL) && sep) { - /* Remove the path delimiter at the end */ - if (sep > root) { - sep[0] = '\0'; - sep = strrchr(buf, '/'); + if (simplify_path) { + const char *root = "/"; + COPY(root, root + 1); + path = out; + if (use_base_path) { + ret = append_path(path, out_end, &out, ub.path, base_path_end); + if (ret < 0) + return ret; } - - /* If the next directory name to pop off is "..", break here */ - if (!strcmp(sep ? &sep[1] : buf, "..")) { - /* Readd the slash we just removed */ - av_strlcat(buf, "/", size); - break; + if (URL_COMPONENT_HAVE(uc, path)) { + ret = append_path(path, out_end, &out, uc.path, uc.url_component_end_path); + if (ret < 0) + return ret; } - /* Cut off the directory name */ - if (sep) - sep[1] = '\0'; - else - buf[0] = '\0'; - rel += 3; + } else { + if (use_base_path) + COPY(ub.path, base_path_end); + COPY(uc.path, uc.url_component_end_path); } - av_strlcat(buf, rel, size); - trim_double_dot_url(tmp_path, buf, size); - memset(buf, 0, size); - av_strlcpy(buf, tmp_path, size); + + COPY(uc.url_component_end_path, uc.end); +#undef COPY + *out = 0; + return 0; } AVIODirEntry *ff_alloc_dir_entry(void) diff --git a/libavformat/url.h b/libavformat/url.h index ae27da5c73..e33edf0cb9 100644 --- a/libavformat/url.h +++ b/libavformat/url.h @@ -312,8 +312,8 @@ int ff_url_join(char *str, int size, const char *proto, * @param base the base url, may be equal to buf. * @param rel the new url, which is interpreted relative to base */ -void ff_make_absolute_url(char *buf, int size, const char *base, - const char *rel); +int ff_make_absolute_url(char *buf, int size, const char *base, + const char *rel); /** * Allocate directory entry with default values. diff --git a/tests/ref/fate/url b/tests/ref/fate/url index 84cf85abdd..7998fb5be9 100644 --- a/tests/ref/fate/url +++ b/tests/ref/fate/url @@ -46,9 +46,9 @@ http://[::1]:8080/dev/null => Testing ff_make_absolute_url: (null) baz => baz /foo/bar baz => /foo/baz - /foo/bar ../baz => /baz + /foo/bar ../baz => /foo/../baz /foo/bar /baz => /baz - /foo/bar ../../../baz => /baz + /foo/bar ../../../baz => /foo/../../../baz http://server/foo/ baz => http://server/foo/baz http://server/foo/bar baz => http://server/foo/baz http://server/foo/ ../baz => http://server/baz @@ -62,6 +62,15 @@ Testing ff_make_absolute_url: http://server/foo/bar /../../../../../other/url => http://server/other/url http://server/foo/bar /test/../../../../../other/url => http://server/other/url http://server/foo/bar /test/../../test/../../../other/url => http://server/other/url + http://server/foo/bar file:../baz/qux => file:../baz/qux + http://server/foo//bar/ ../../ => http://server/foo/ + file:../tmp/foo ../bar/ => file:../tmp/../bar/ + file:../tmp/foo file:../bar/ => file:../bar/ + http://server/foo/bar ./ => http://server/foo/ + http://server/foo/bar .dotfile => http://server/foo/.dotfile + http://server/foo/bar ..doubledotfile => http://server/foo/..doubledotfile + http://server/foo/bar double..dotfile => http://server/foo/double..dotfile + http://server/foo/bar doubledotfile.. => http://server/foo/doubledotfile.. Testing av_url_split: /foo/bar => -1 /foo/bar