diff mbox series

[FFmpeg-devel,v5,1/2] avutil/wchar_filename, file_open: Support long file names on Windows

Message ID 13118dc1faccb2e31f92dd21511b6558a6e9ab3d.1653381808.git.ffmpegagent@gmail.com
State New
Headers show
Series Support long file names on Windows | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished

Commit Message

Aman Karmani May 24, 2022, 8:43 a.m. UTC
From: softworkz <softworkz@hotmail.com>

Signed-off-by: softworkz <softworkz@hotmail.com>
---
 libavutil/file_open.c      |   2 +-
 libavutil/wchar_filename.h | 166 +++++++++++++++++++++++++++++++++++++
 2 files changed, 167 insertions(+), 1 deletion(-)

Comments

Martin Storsjö May 24, 2022, 9:09 a.m. UTC | #1
On Tue, 24 May 2022, softworkz wrote:

> From: softworkz <softworkz@hotmail.com>
>
> Signed-off-by: softworkz <softworkz@hotmail.com>
> ---
> libavutil/file_open.c      |   2 +-
> libavutil/wchar_filename.h | 166 +++++++++++++++++++++++++++++++++++++
> 2 files changed, 167 insertions(+), 1 deletion(-)
>
> diff --git a/libavutil/file_open.c b/libavutil/file_open.c
> index fb64c2e4ee..58a6073353 100644
> --- a/libavutil/file_open.c
> +++ b/libavutil/file_open.c
> @@ -45,7 +45,7 @@ static int win32_open(const char *filename_utf8, int oflag, int pmode)
>     wchar_t *filename_w;
>
>     /* convert UTF-8 to wide chars */
> -    if (utf8towchar(filename_utf8, &filename_w))
> +    if (get_extended_win32_path(filename_utf8, &filename_w))
>         return -1;

Note, the caller expects that if the function returned an error, all 
temporary allocations made by the function have been freed - the caller 
doesn't need to free those allocations.

>     if (!filename_w)
>         goto fallback;
> diff --git a/libavutil/wchar_filename.h b/libavutil/wchar_filename.h
> index 90f082452c..94f8ce54b5 100644
> --- a/libavutil/wchar_filename.h
> +++ b/libavutil/wchar_filename.h
> @@ -40,6 +40,172 @@ static inline int utf8towchar(const char *filename_utf8, wchar_t **filename_w)
>     MultiByteToWideChar(CP_UTF8, 0, filename_utf8, -1, *filename_w, num_chars);
>     return 0;
> }
> +
> +/**
> + * Checks for extended path prefixes for which normalization needs to be skipped.
> + * see .NET6: PathInternal.IsExtended()
> + */
> +static inline int path_is_extended(const wchar_t *path)
> +{
> +    if (path[0] == L'\\' && (path[1] == L'\\' || path[1] == L'?') && path[2] == L'?' && path[3] == L'\\')
> +        return 1;
> +
> +    return 0;
> +}
> +
> +/**
> + * Checks for a device path prefix.
> + * see .NET6: PathInternal.IsDevicePath()
> + */
> +static inline int path_is_device_path(const wchar_t *path)
> +{
> +    if (path[0] == L'\\' && path[1] == L'\\' && path[2] == L'.' && path[3] == L'\\')
> +        return 1;
> +
> +    return 0;
> +}
> +
> +/**
> + * Performs path normalization by calling GetFullPathNameW().
> + * see .NET6: PathHelper.GetFullPathName()
> + */
> +static inline int get_full_path_name(wchar_t **ppath_w)
> +{
> +    int num_chars;
> +    wchar_t *temp_w;
> +
> +    num_chars = GetFullPathNameW(*ppath_w, 0, NULL, NULL);
> +    if (num_chars <= 0) {
> +        errno = EINVAL;
> +        return -1;
> +    }
> +
> +    temp_w = (wchar_t *)av_calloc(num_chars, sizeof(wchar_t));
> +    if (!temp_w) {
> +        errno = ENOMEM;
> +        return -1;
> +    }
> +
> +    num_chars = GetFullPathNameW(*ppath_w, num_chars, temp_w, NULL);
> +    if (num_chars <= 0) {
> +        errno = EINVAL;
> +        return -1;

In this error handling path, you leak the allocated temp_w

> +    }
> +
> +    av_freep(ppath_w);
> +    *ppath_w = temp_w;
> +
> +    return 0;
> +}
> +
> +/**
> + * Normalizes a Windows file or folder path.
> + * Expansion of short paths (with 8.3 path components) is currently omitted
> + * as it is not required for accessing long paths.
> + * see .NET6: PathHelper.Normalize().
> + */
> +static inline int path_normalize(wchar_t **ppath_w)
> +{
> +    int ret;
> +
> +    if ((ret = get_full_path_name(ppath_w)) < 0)
> +        return ret;
> +
> +    /* What .NET does at this point is to call PathHelper.TryExpandShortFileName()
> +     * in case the path contains a '~' character.
> +     * We don't need to do this as we don't need to normalize the file name
> +     * for presentation, and the extended path prefix works with 8.3 path
> +     * components as well
> +     */
> +    return 0;
> +}
> +
> +/**
> + * Adds an extended path or UNC prefix to longs paths or paths ending
> + * with a space or a dot. (' ' or '.').
> + * This function expects that the path has been normalized before by
> + * calling path_normalize() and it doesn't check whether the path is
> + * actually long (> MAX_PATH).
> + * see .NET6: PathInternal.EnsureExtendedPrefix() *
> + */
> +static inline int add_extended_prefix(wchar_t **ppath_w)
> +{
> +    const wchar_t *unc_prefix           = L"\\\\?\\UNC\\";
> +    const wchar_t *extended_path_prefix = L"\\\\?\\";
> +    const wchar_t *path_w               = *ppath_w;
> +    const size_t len                    = wcslen(path_w);
> +    wchar_t *temp_w;
> +
> +    /* We're skipping the check IsPartiallyQualified() because
> +     * we expect to have called GetFullPathNameW() already. */
> +    if (len < 2 || path_is_extended(*ppath_w) || path_is_device_path(*ppath_w)) {
> +        return 0;
> +    }
> +
> +    if (path_w[0] == L'\\' && path_w[1] == L'\\') {
> +        /* unc_prefix length is 8 plus 1 for terminating zeros,
> +         * we subtract 2 for the leading '\\' of the original path */
> +        temp_w = (wchar_t *)av_calloc(len - 2 + 8 + 1, sizeof(wchar_t));
> +        if (!temp_w) {
> +            errno = ENOMEM;
> +            return -1;
> +        }
> +        wcscpy(temp_w, unc_prefix);
> +        wcscat(temp_w, path_w + 2);
> +    } else {
> +        // The length of extended_path_prefix is 4 plus 1 for terminating zeros
> +        temp_w = (wchar_t *)av_calloc(len + 4 + 1, sizeof(wchar_t));
> +        if (!temp_w) {
> +            errno = ENOMEM;
> +            return -1;
> +        }
> +        wcscpy(temp_w, extended_path_prefix);
> +        wcscat(temp_w, path_w);
> +    }
> +
> +    av_freep(ppath_w);
> +    *ppath_w = temp_w;
> +
> +    return 0;
> +}
> +
> +/**
> + * Converts a file or folder path to wchar_t for use with Windows file
> + * APIs. Paths with extended path prefix (either '\\?\' or \??\') are
> + * left unchanged.
> + * All other paths are normalized and converted to absolute paths.
> + * Longs paths (>= MAX_PATH) are prefixed with the extended path or extended
> + * UNC path prefix.
> + * see .NET6: Path.GetFullPath() and Path.GetFullPathInternal()
> + */
> +static inline int get_extended_win32_path(const char *path, wchar_t **ppath_w)
> +{
> +    int ret;
> +    size_t len;
> +
> +    if ((ret = utf8towchar(path, ppath_w)) < 0)
> +        return ret;
> +
> +    if (path_is_extended(*ppath_w)) {
> +        /* Paths prefixed with '\\?\' or \??\' are considered normalized by definition.
> +         * Windows doesn't normalize those paths and neither should we.
> +         */
> +        return 0;
> +    }
> +
> +    if ((ret = path_normalize(ppath_w)) < 0)
> +        return ret;

If we return an error here, we have already allocated an output in 
ppath_w - but the caller won't clean up such allocations on errors. Thus, 
if we have allocated something here (that wasn't allocated before), we 
must take care to clean it up on errors.


Additionally - with all the references to .NET6, could you add a base url 
somewhere in the code, where one can look up those references? For someone 
not familiar with the .NET ecosystem, I'm not entirely sure where I would 
start.

// Martin
diff mbox series

Patch

diff --git a/libavutil/file_open.c b/libavutil/file_open.c
index fb64c2e4ee..58a6073353 100644
--- a/libavutil/file_open.c
+++ b/libavutil/file_open.c
@@ -45,7 +45,7 @@  static int win32_open(const char *filename_utf8, int oflag, int pmode)
     wchar_t *filename_w;
 
     /* convert UTF-8 to wide chars */
-    if (utf8towchar(filename_utf8, &filename_w))
+    if (get_extended_win32_path(filename_utf8, &filename_w))
         return -1;
     if (!filename_w)
         goto fallback;
diff --git a/libavutil/wchar_filename.h b/libavutil/wchar_filename.h
index 90f082452c..94f8ce54b5 100644
--- a/libavutil/wchar_filename.h
+++ b/libavutil/wchar_filename.h
@@ -40,6 +40,172 @@  static inline int utf8towchar(const char *filename_utf8, wchar_t **filename_w)
     MultiByteToWideChar(CP_UTF8, 0, filename_utf8, -1, *filename_w, num_chars);
     return 0;
 }
+
+/**
+ * Checks for extended path prefixes for which normalization needs to be skipped.
+ * see .NET6: PathInternal.IsExtended()
+ */
+static inline int path_is_extended(const wchar_t *path)
+{
+    if (path[0] == L'\\' && (path[1] == L'\\' || path[1] == L'?') && path[2] == L'?' && path[3] == L'\\')
+        return 1;
+
+    return 0;
+}
+
+/**
+ * Checks for a device path prefix.
+ * see .NET6: PathInternal.IsDevicePath()
+ */
+static inline int path_is_device_path(const wchar_t *path)
+{
+    if (path[0] == L'\\' && path[1] == L'\\' && path[2] == L'.' && path[3] == L'\\')
+        return 1;
+
+    return 0;
+}
+
+/**
+ * Performs path normalization by calling GetFullPathNameW().
+ * see .NET6: PathHelper.GetFullPathName()
+ */
+static inline int get_full_path_name(wchar_t **ppath_w)
+{
+    int num_chars;
+    wchar_t *temp_w;
+
+    num_chars = GetFullPathNameW(*ppath_w, 0, NULL, NULL);
+    if (num_chars <= 0) {
+        errno = EINVAL;
+        return -1;
+    }
+
+    temp_w = (wchar_t *)av_calloc(num_chars, sizeof(wchar_t));
+    if (!temp_w) {
+        errno = ENOMEM;
+        return -1;
+    }
+
+    num_chars = GetFullPathNameW(*ppath_w, num_chars, temp_w, NULL);
+    if (num_chars <= 0) {
+        errno = EINVAL;
+        return -1;
+    }
+
+    av_freep(ppath_w);
+    *ppath_w = temp_w;
+
+    return 0;
+}
+
+/**
+ * Normalizes a Windows file or folder path.
+ * Expansion of short paths (with 8.3 path components) is currently omitted
+ * as it is not required for accessing long paths.
+ * see .NET6: PathHelper.Normalize().
+ */
+static inline int path_normalize(wchar_t **ppath_w)
+{
+    int ret;
+
+    if ((ret = get_full_path_name(ppath_w)) < 0)
+        return ret;
+
+    /* What .NET does at this point is to call PathHelper.TryExpandShortFileName()
+     * in case the path contains a '~' character.
+     * We don't need to do this as we don't need to normalize the file name
+     * for presentation, and the extended path prefix works with 8.3 path
+     * components as well
+     */
+    return 0;
+}
+
+/**
+ * Adds an extended path or UNC prefix to longs paths or paths ending
+ * with a space or a dot. (' ' or '.').
+ * This function expects that the path has been normalized before by
+ * calling path_normalize() and it doesn't check whether the path is
+ * actually long (> MAX_PATH).
+ * see .NET6: PathInternal.EnsureExtendedPrefix() *
+ */
+static inline int add_extended_prefix(wchar_t **ppath_w)
+{
+    const wchar_t *unc_prefix           = L"\\\\?\\UNC\\";
+    const wchar_t *extended_path_prefix = L"\\\\?\\";
+    const wchar_t *path_w               = *ppath_w;
+    const size_t len                    = wcslen(path_w);
+    wchar_t *temp_w;
+
+    /* We're skipping the check IsPartiallyQualified() because
+     * we expect to have called GetFullPathNameW() already. */
+    if (len < 2 || path_is_extended(*ppath_w) || path_is_device_path(*ppath_w)) {
+        return 0;
+    }
+
+    if (path_w[0] == L'\\' && path_w[1] == L'\\') {
+        /* unc_prefix length is 8 plus 1 for terminating zeros,
+         * we subtract 2 for the leading '\\' of the original path */
+        temp_w = (wchar_t *)av_calloc(len - 2 + 8 + 1, sizeof(wchar_t));
+        if (!temp_w) {
+            errno = ENOMEM;
+            return -1;
+        }
+        wcscpy(temp_w, unc_prefix);
+        wcscat(temp_w, path_w + 2);
+    } else {
+        // The length of extended_path_prefix is 4 plus 1 for terminating zeros
+        temp_w = (wchar_t *)av_calloc(len + 4 + 1, sizeof(wchar_t));
+        if (!temp_w) {
+            errno = ENOMEM;
+            return -1;
+        }
+        wcscpy(temp_w, extended_path_prefix);
+        wcscat(temp_w, path_w);
+    }
+
+    av_freep(ppath_w);
+    *ppath_w = temp_w;
+
+    return 0;
+}
+
+/**
+ * Converts a file or folder path to wchar_t for use with Windows file
+ * APIs. Paths with extended path prefix (either '\\?\' or \??\') are
+ * left unchanged.
+ * All other paths are normalized and converted to absolute paths.
+ * Longs paths (>= MAX_PATH) are prefixed with the extended path or extended
+ * UNC path prefix.
+ * see .NET6: Path.GetFullPath() and Path.GetFullPathInternal()
+ */
+static inline int get_extended_win32_path(const char *path, wchar_t **ppath_w)
+{
+    int ret;
+    size_t len;
+
+    if ((ret = utf8towchar(path, ppath_w)) < 0)
+        return ret;
+
+    if (path_is_extended(*ppath_w)) {
+        /* Paths prefixed with '\\?\' or \??\' are considered normalized by definition.
+         * Windows doesn't normalize those paths and neither should we.
+         */
+        return 0;
+    }
+
+    if ((ret = path_normalize(ppath_w)) < 0)
+        return ret;
+
+    // see .NET6: PathInternal.EnsureExtendedPrefixIfNeeded()
+    len = wcslen(*ppath_w);
+    if (len >= MAX_PATH) {
+        if ((ret = add_extended_prefix(ppath_w)) < 0)
+            return ret;
+    }
+
+    return 0;
+}
+
 #endif
 
 #endif /* AVUTIL_WCHAR_FILENAME_H */