diff mbox series

[FFmpeg-devel,1/2] avutil/wchar_filename, file_open: Support long file names on Windows

Message ID 26c579e4ee0065a81f46b18adeba1b97385d8257.1652435595.git.ffmpegagent@gmail.com
State New
Headers show
Series Support long file names on Windows | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Aman Karmani May 13, 2022, 9:53 a.m. UTC
From: softworkz <softworkz@hotmail.com>

Signed-off-by: softworkz <softworkz@hotmail.com>
---
 libavutil/file_open.c      |   2 +-
 libavutil/wchar_filename.h | 123 +++++++++++++++++++++++++++++++++++++
 2 files changed, 124 insertions(+), 1 deletion(-)

Comments

nihil-admirari May 15, 2022, 7:02 p.m. UTC | #1
> diff --git a/libavutil/wchar_filename.h b/libavutil/wchar_filename.h
> ...
> +static inline int path_is_extended(const wchar_t *path)
> ...

Why path handling functions ended up in wchar_filename.h?
Isn't it better to move them to file_open or os_support?

> +    num_chars = GetFullPathNameW(*ppath_w, num_chars, temp_w, NULL);

Turns out that GetFullPathNameW handles long path names without the manifest
or a prefix \\?\. Other WinAPI functions, require either a prefix or a manifest,
which is why I thought that path normalisation must be done by hand.

> +static inline int path_normalize(wchar_t **ppath_w)
> +{
> +    int ret;
> +
> +    // see .NET6: PathHelper.Normalize()
> +    if ((ret = get_full_path_name(ppath_w)) < 0)
> +        return ret;
> +
> +    /* What .NET does at this point is to call PathHelper.TryExpandShortFileName()
> +       in case the path contains a '~' character.
> +       We don't need to do this as we don't need to normalize the file name
> +       for presentation, and the extended path prefix works with 8.3 path
> +       components as well */
> +    return 0;
> +}

This function simply forwards the return code of get_full_path_name().
The only non-trivial part of it is a comment.

> +static inline int path_is_extended(const wchar_t *path)
> +{
> +    // see .NET6: PathInternal.IsExtended()
> +    size_t len = wcslen(path);
> +    if (len >= 4  && path[0] == L'\\' && (path[1] == L'\\' || path[1] == L'?') && path[2] == L'?' && path[3] == L'\\')
> +        return 1;
> +
> +    return 0;
> +}
>
> +static inline int add_extended_prefix(wchar_t **ppath_w)
> +{
> +    const wchar_t *unc_prefix           = L"\\\\?\\UNC\\";
> ...
> +    // see .NET6: PathInternal.EnsureExtendedPrefix()
> +    if (path_w[0] == L'\\' && path_w[1] == L'\\') {
> ...
> +        wcscpy(temp_w, unc_prefix);
> +        wcscat(temp_w, path_w + 2);
> 
> +static inline int get_extended_win32_path(const char *path, wchar_t **ppath_w)
> +{
> ...
> +    if (path_is_extended(*ppath_w)) {
> +        ...
> +        return 0;
> +    }
> ...
> +        if ((ret = add_extended_prefix(ppath_w)) < 0)

Actual PathInternal.EnsureExtendedPrefix
(https://github.com/dotnet/runtime/blob/main/src/libraries/Common/src/System/IO/PathInternal.Windows.cs)
checks for

            if (IsPartiallyQualified(path.AsSpan()) || IsDevice(path.AsSpan()))
                return path;

where IsDevice handles \\.\, which you do not handle. If I'm not mistaken,
the code paths presented above will turn such paths into \\?\UNC\\.\,
which is an error.

> +static inline int add_extended_prefix(wchar_t **ppath_w)
> +{
> +    const wchar_t *unc_prefix           = L"\\\\?\\UNC\\";
> ...
> +        temp_w = (wchar_t *)av_calloc(len + 6 + 1, sizeof(wchar_t));

Wouldn't it be better to use sizeof unc_prefix instead of magic numbers?
Soft Works May 15, 2022, 8:24 p.m. UTC | #2
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of nil-
> admirari@mailo.com
> Sent: Sunday, May 15, 2022 9:03 PM
> To: ffmpeg-devel@ffmpeg.org
> Subject: Re: [FFmpeg-devel] [PATCH 1/2] avutil/wchar_filename,
> file_open: Support long file names on Windows
> 
> > diff --git a/libavutil/wchar_filename.h b/libavutil/wchar_filename.h
> > ...
> > +static inline int path_is_extended(const wchar_t *path)
> > ...
> 
> Why path handling functions ended up in wchar_filename.h?
> Isn't it better to move them to file_open or os_support?

The functions are needed in both. file_open.c cannot be included
in libavformat/os_support.h and neither the other way round, 
so they need to be in a 3rd place. How about renaming
wchar_filename.h to windows_filename.h ?


> > +    num_chars = GetFullPathNameW(*ppath_w, num_chars, temp_w,
> NULL);
> 
> Turns out that GetFullPathNameW handles long path names without the
> manifest
> or a prefix \\?\. Other WinAPI functions, require either a prefix or a
> manifest,
> which is why I thought that path normalisation must be done by hand.

Yea, that's where we are lucky.


> > +static inline int path_normalize(wchar_t **ppath_w)
> > +{
> > +    int ret;
> > +
> > +    // see .NET6: PathHelper.Normalize()
> > +    if ((ret = get_full_path_name(ppath_w)) < 0)
> > +        return ret;
> > +
> > +    /* What .NET does at this point is to call
> PathHelper.TryExpandShortFileName()
> > +       in case the path contains a '~' character.
> > +       We don't need to do this as we don't need to normalize the
> file name
> > +       for presentation, and the extended path prefix works with
> 8.3 path
> > +       components as well */
> > +    return 0;
> > +}
> 
> This function simply forwards the return code of get_full_path_name().
> The only non-trivial part of it is a comment.

I wanted those functions to resemble the handling path from
.NET. 

I had already started the implementation of TryExpandShortFilePath()
as well, but then I figured that this isn't necessary. For this,
I had tested a long path in the form with 8.3 path components (where
even the 8.3 form is longer than 260) and even those paths work 
with the extended prefix. That's why I skipped this part as in our
case it will only be used internally while in .NET it is done
because the GetFullPathMethod() is also used for other purposes
where expansion is desirable.

Of course we could merge some of those functions together, but the
compiler will do the inlining anyway, that's why I chose to keep
the functions separate for better clarity.

 
> > +static inline int path_is_extended(const wchar_t *path)
> > +{
> > +    // see .NET6: PathInternal.IsExtended()
> > +    size_t len = wcslen(path);
> > +    if (len >= 4  && path[0] == L'\\' && (path[1] == L'\\' ||
> path[1] == L'?') && path[2] == L'?' && path[3] == L'\\')
> > +        return 1;
> > +
> > +    return 0;
> > +}
> >
> > +static inline int add_extended_prefix(wchar_t **ppath_w)
> > +{
> > +    const wchar_t *unc_prefix           = L"\\\\?\\UNC\\";
> > ...
> > +    // see .NET6: PathInternal.EnsureExtendedPrefix()
> > +    if (path_w[0] == L'\\' && path_w[1] == L'\\') {
> > ...
> > +        wcscpy(temp_w, unc_prefix);
> > +        wcscat(temp_w, path_w + 2);
> >
> > +static inline int get_extended_win32_path(const char *path, wchar_t
> **ppath_w)
> > +{
> > ...
> > +    if (path_is_extended(*ppath_w)) {
> > +        ...
> > +        return 0;
> > +    }
> > ...
> > +        if ((ret = add_extended_prefix(ppath_w)) < 0)
> 
> Actual PathInternal.EnsureExtendedPrefix
> (https://github.com/dotnet/runtime/blob/main/src/libraries/Common/src/
> System/IO/PathInternal.Windows.cs)
> checks for
> 
>             if (IsPartiallyQualified(path.AsSpan()) ||
> IsDevice(path.AsSpan()))
>                 return path;


> where IsDevice handles \\.\, which you do not handle. If I'm not
> mistaken,
> the code paths presented above will turn such paths into \\?\UNC\\.\,
> which is an error.

I have skipped those checks because we won't have partially qualified
paths at this point (due to having called GetFullPathNameW) and
device paths are not allowed to be longer than 260, so this it might
happen that the UNC prefix gets added, but only when it's a long
path which doesn't work anyway (I've tested those cases).

> > +static inline int add_extended_prefix(wchar_t **pp
ath_w)
> > +{
> > +    const wchar_t *unc_prefix           = L"\\\\?\\UNC\\";
> > ...
> > +        temp_w = (wchar_t *)av_calloc(len + 6 + 1,
> sizeof(wchar_t));
> 
> Wouldn't it be better to use sizeof unc_prefix instead of magic
> numbers?

Then we would need to subtract the terminating zeros and divide
by two => ugly.
Or do another wcslen() call => unnecessary. 

I have added comments right above now, explaining those numbers.

Thanks for your review,
softworkz
nihil-admirari May 16, 2022, 8:34 a.m. UTC | #3
> The functions are needed in both. file_open.c cannot be included
> in libavformat/os_support.h and neither the other way round, 
> so they need to be in a 3rd place. How about renaming
> wchar_filename.h to windows_filename.h ?

Probably it's better to rename.

> I have skipped those checks because we won't have partially qualified
> paths at this point (due to having called GetFullPathNameW) and
> device paths are not allowed to be longer than 260, so this it might
> happen that the UNC prefix gets added, but only when it's a long
> path which doesn't work anyway (I've tested those cases).

I think it's better to test for \\.\ explicitly in path_is_extended:
1. It's not obvious that \\.\ aren't allowed to be long.
2. Probably FFmpeg is not going to have a longPathAware manifest,
   but it can be linked with an EXE with such a manifest.
   Would MAX_PATH restriction still apply?

You have the checks inside of get_extended_win32_path and none
inside of add_extended_prefix. Yet add_extended_prefix can be called
by anyone: it's not private. Thus add_extended_prefix either should be inlined,
or it should have the necessary checks in place. Otherwise you end up with
an API that's easy to use incorrectly and hard to use correctly, and it should be
the other way around.
diff mbox series

Patch

diff --git a/libavutil/file_open.c b/libavutil/file_open.c
index cc302f2f76..57c5e78d51 100644
--- a/libavutil/file_open.c
+++ b/libavutil/file_open.c
@@ -45,7 +45,7 @@  static int win32_open(const char *filename_utf8, int oflag, int pmode)
     wchar_t *filename_w;
 
     /* convert UTF-8 to wide chars */
-    if (utf8towchar(filename_utf8, &filename_w))
+    if (get_extended_win32_path(filename_utf8, &filename_w))
         return -1;
     if (!filename_w)
         goto fallback;
diff --git a/libavutil/wchar_filename.h b/libavutil/wchar_filename.h
index 90f082452c..94b4087de0 100644
--- a/libavutil/wchar_filename.h
+++ b/libavutil/wchar_filename.h
@@ -40,6 +40,129 @@  static inline int utf8towchar(const char *filename_utf8, wchar_t **filename_w)
     MultiByteToWideChar(CP_UTF8, 0, filename_utf8, -1, *filename_w, num_chars);
     return 0;
 }
+
+static inline int path_is_extended(const wchar_t *path)
+{
+    // see .NET6: PathInternal.IsExtended()
+    size_t len = wcslen(path);
+    if (len >= 4  && path[0] == L'\\' && (path[1] == L'\\' || path[1] == L'?') && path[2] == L'?' && path[3] == L'\\')
+        return 1;
+
+    return 0;
+}
+
+static inline int get_full_path_name(wchar_t **ppath_w)
+{
+    int num_chars;
+    wchar_t *temp_w;
+
+    // see .NET6: PathHelper.GetFullPathName()
+    num_chars = GetFullPathNameW(*ppath_w, 0, NULL, NULL);
+    if (num_chars <= 0) {
+        errno = EINVAL;
+        return -1;
+    }
+
+    temp_w = (wchar_t *)av_calloc(num_chars, sizeof(wchar_t));
+    if (!temp_w) {
+        errno = ENOMEM;
+        return -1;
+    }
+
+    num_chars = GetFullPathNameW(*ppath_w, num_chars, temp_w, NULL);
+    if (num_chars <= 0) {
+        errno = EINVAL;
+        return -1;
+    }
+
+    av_freep(ppath_w);
+    *ppath_w = temp_w;
+
+    return 0;
+}
+
+static inline int path_normalize(wchar_t **ppath_w)
+{
+    int ret;
+
+    // see .NET6: PathHelper.Normalize()
+    if ((ret = get_full_path_name(ppath_w)) < 0)
+        return ret;
+
+    /* What .NET does at this point is to call PathHelper.TryExpandShortFileName()
+       in case the path contains a '~' character.
+       We don't need to do this as we don't need to normalize the file name
+       for presentation, and the extended path prefix works with 8.3 path
+       components as well */
+    return 0;
+}
+
+static inline int add_extended_prefix(wchar_t **ppath_w)
+{
+    const wchar_t *unc_prefix           = L"\\\\?\\UNC\\";
+    const wchar_t *extended_path_prefix = L"\\\\?\\";
+    const wchar_t *path_w               = *ppath_w;
+    const size_t len                    = wcslen(path_w);
+    wchar_t *temp_w;
+
+    if (len < 2)
+        return 0;
+
+    // see .NET6: PathInternal.EnsureExtendedPrefix()
+    if (path_w[0] == L'\\' && path_w[1] == L'\\') {
+        temp_w = (wchar_t *)av_calloc(len + 6 + 1, sizeof(wchar_t));
+        if (!temp_w) {
+            errno = ENOMEM;
+            return -1;
+        }
+        wcscpy(temp_w, unc_prefix);
+        wcscat(temp_w, path_w + 2);
+    } else {
+        temp_w = (wchar_t *)av_calloc(len + 4 + 1, sizeof(wchar_t));
+        if (!temp_w) {
+            errno = ENOMEM;
+            return -1;
+        }
+        wcscpy(temp_w, extended_path_prefix);
+        wcscat(temp_w, path_w);
+    }
+
+    av_freep(ppath_w);
+    *ppath_w = temp_w;
+
+    return 0;
+}
+
+static inline int get_extended_win32_path(const char *path, wchar_t **ppath_w)
+{
+    int ret;
+    size_t len;
+
+    // see .NET6: Path.GetFullPath() and Path.GetFullPathInternal()
+    if ((ret = utf8towchar(path, ppath_w)) < 0)
+        return ret;
+
+    if (path_is_extended(*ppath_w)) {
+        /* \\?\ paths are considered normalized by definition. Windows doesn't normalize \\?\
+           paths and neither should we. Even if we wanted to, GetFullPathName does not work
+           properly with device paths. If one wants to pass a \\?\ path through normalization
+           one can chop off the prefix, pass it to GetFullPath and add it again. */
+        return 0;
+    }
+
+    if ((ret = path_normalize(ppath_w)) < 0)
+        return ret;
+
+    // see .NET6: PathInternal.EnsureExtendedPrefixIfNeeded()
+    len = wcslen(*ppath_w);
+    if (len >= 260 || (*ppath_w)[len - 1] == L' ' || (*ppath_w)[len - 1] == L'.') {
+        if ((ret = add_extended_prefix(ppath_w)) < 0)
+            return ret;
+    }
+
+    return 0;
+}
+
 #endif
 
 #endif /* AVUTIL_WCHAR_FILENAME_H */