diff mbox series

[FFmpeg-devel,v7,2/3] avformat/os_support: Support long file names on Windows

Message ID 7751335906bcaa5da249928c9830fcc75b01970c.1653430851.git.ffmpegagent@gmail.com
State New
Headers show
Series Support long file names on Windows | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

softworkz May 24, 2022, 10:20 p.m. UTC
From: softworkz <softworkz@hotmail.com>

Signed-off-by: softworkz <softworkz@hotmail.com>
---
 libavformat/os_support.h | 106 ++++++++++++++++++++++++++++++---------
 1 file changed, 81 insertions(+), 25 deletions(-)

Comments

Nil Admirari May 25, 2022, 2:47 p.m. UTC | #1
> + struct win32_stat
> + {
> + _dev_t st_dev; /* ID of device containing file */
> + _ino_t st_ino; /* inode number */
> + unsigned short st_mode; /* protection */
> + short st_nlink; /* number of hard links */
> + short st_uid; /* user ID of owner */
> + short st_gid; /* group ID of owner */
> + _dev_t st_rdev; /* device ID (if special file) */
> + int64_t st_size; /* total size, in bytes */
> + int64_t st_atime; /* time of last access */
> + int64_t st_mtime; /* time of last modification */
> + int64_t st_ctime; /* time of last status change */
> + };

Wouldn't it make sense to add a
static_assert(sizeof(struct win32_stat) == sizeof(struct _stati64))
somewhere?

> +static inline int win32_access(const char *filename_utf8, int par)
> +static inline void copy_stat(struct _stati64 *winstat, struct win32_stat *par)
> +static inline int win32_stat(const char *filename_utf8, struct win32_stat *par)
> +static inline int win32_fstat(int fd, struct win32_stat *par)

How about renaming par to something more appropriate?

> +static inline void copy_stat(struct _stati64 *winstat, struct win32_stat *par)
> +{
> + par->st_dev = winstat->st_dev;
> + par->st_ino = winstat->st_ino;
> + par->st_mode = winstat->st_mode;
> + par->st_nlink = winstat->st_nlink;
> + par->st_uid = winstat->st_uid;
> + par->st_gid = winstat->st_gid;
> + par->st_rdev = winstat->st_rdev;
> + par->st_size = winstat->st_size;
> + par->st_atime = winstat->st_atime;
> + par->st_mtime = winstat->st_mtime;
> + par->st_ctime = winstat->st_ctime;
> }

Would memcpy make more sense here?

> +static inline int win32_stat(const char *filename_utf8, struct win32_stat *par)
> +{
> + struct _stati64 winstat = { 0 };
> ...
> 
> +static inline int win32_fstat(int fd, struct win32_stat *par)
> +{
> + struct _stati64 winstat = { 0 };
> ...

Functions use _stati64 internally, which is affected by _USE_32BIT_TIME_T.
win32_stat does not take _USE_32BIT_TIME_T into account at all.
It was already pointed out that _USE_32BIT_TIME_T is actually used:
https://ffmpeg.org/pipermail/ffmpeg-devel/2022-May/296731.html.
Soft Works May 25, 2022, 3:28 p.m. UTC | #2
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of nil-
> admirari@mailo.com
> Sent: Wednesday, May 25, 2022 4:48 PM
> To: ffmpeg-devel@ffmpeg.org
> Subject: Re: [FFmpeg-devel] [PATCH v7 2/3] avformat/os_support: Support
> long file names on Windows
> 
> > + struct win32_stat
> > + {
> > + _dev_t st_dev; /* ID of device containing file */
> > + _ino_t st_ino; /* inode number */
> > + unsigned short st_mode; /* protection */
> > + short st_nlink; /* number of hard links */
> > + short st_uid; /* user ID of owner */
> > + short st_gid; /* group ID of owner */
> > + _dev_t st_rdev; /* device ID (if special file) */
> > + int64_t st_size; /* total size, in bytes */
> > + int64_t st_atime; /* time of last access */
> > + int64_t st_mtime; /* time of last modification */
> > + int64_t st_ctime; /* time of last status change */
> > + };
> 
> Wouldn't it make sense to add a
> static_assert(sizeof(struct win32_stat) == sizeof(struct _stati64))
> somewhere?

No, it is intended and expected that the structs are different.

> 
> > +static inline int win32_access(const char *filename_utf8, int par)
> > +static inline void copy_stat(struct _stati64 *winstat, struct
> win32_stat *par)
> > +static inline int win32_stat(const char *filename_utf8, struct
> win32_stat *par)
> > +static inline int win32_fstat(int fd, struct win32_stat *par)
> 
> How about renaming par to something more appropriate?

How? And why?

These functions were always named like that (it just wasn't visible
as these were constructed through macros).

> 
> > +static inline void copy_stat(struct _stati64 *winstat, struct
> win32_stat *par)
> > +{
> > + par->st_dev = winstat->st_dev;
> > + par->st_ino = winstat->st_ino;
> > + par->st_mode = winstat->st_mode;
> > + par->st_nlink = winstat->st_nlink;
> > + par->st_uid = winstat->st_uid;
> > + par->st_gid = winstat->st_gid;
> > + par->st_rdev = winstat->st_rdev;
> > + par->st_size = winstat->st_size;
> > + par->st_atime = winstat->st_atime;
> > + par->st_mtime = winstat->st_mtime;
> > + par->st_ctime = winstat->st_ctime;
> > }
> 
> Would memcpy make more sense here?

No, it is intended and expected that the structs are different.

> 
> > +static inline int win32_stat(const char *filename_utf8, struct
> win32_stat *par)
> > +{
> > + struct _stati64 winstat = { 0 };
> > ...
> >
> > +static inline int win32_fstat(int fd, struct win32_stat *par)
> > +{
> > + struct _stati64 winstat = { 0 };
> > ...
> 
> Functions use _stati64 internally, which is affected by _USE_32BIT_TIME_T.
> win32_stat does not take _USE_32BIT_TIME_T into account at all.
> It was already pointed out that _USE_32BIT_TIME_T is actually used:
> https://ffmpeg.org/pipermail/ffmpeg-devel/2022-May/296731.html.

That's why the structs are different and the fields of
win32_stat always large enough, no matter which struct 
is being used internally.

The point of this solution is to provide independence 
between what the application "sees" and what is used 
to call the Windows API.

Kind regards,
softworkz
Martin Storsjö May 25, 2022, 6:50 p.m. UTC | #3
On Wed, 25 May 2022, nil-admirari@mailo.com wrote:

>> + struct win32_stat
>> + {
>> + _dev_t st_dev; /* ID of device containing file */
>> + _ino_t st_ino; /* inode number */
>> + unsigned short st_mode; /* protection */
>> + short st_nlink; /* number of hard links */
>> + short st_uid; /* user ID of owner */
>> + short st_gid; /* group ID of owner */
>> + _dev_t st_rdev; /* device ID (if special file) */
>> + int64_t st_size; /* total size, in bytes */
>> + int64_t st_atime; /* time of last access */
>> + int64_t st_mtime; /* time of last modification */
>> + int64_t st_ctime; /* time of last status change */
>> + };
>
> Wouldn't it make sense to add a
> static_assert(sizeof(struct win32_stat) == sizeof(struct _stati64))
> somewhere?
>
>> +static inline void copy_stat(struct _stati64 *winstat, struct win32_stat *par)
>> +{
>> + par->st_dev = winstat->st_dev;
>> + par->st_ino = winstat->st_ino;
>> + par->st_mode = winstat->st_mode;
>> + par->st_nlink = winstat->st_nlink;
>> + par->st_uid = winstat->st_uid;
>> + par->st_gid = winstat->st_gid;
>> + par->st_rdev = winstat->st_rdev;
>> + par->st_size = winstat->st_size;
>> + par->st_atime = winstat->st_atime;
>> + par->st_mtime = winstat->st_mtime;
>> + par->st_ctime = winstat->st_ctime;
>> }
>
> Would memcpy make more sense here?

As explained elsewhere too, the explicit intent is that this is a 
different struct than the real _stati64 or whichever happens to be used, 
not necessarily identical.

We don't know the exact layout of the real stat struct (and technically, 
different C runtimes, e.g. msvcrt.dll, msvcr100.dll, msvcr120.dll, UCRT, 
could all have different layouts/sizes), and it's brittle to try to 
guess/mimic it. So instead of trying to mimic it, we just make our own 
(which is what ends up used in the calling libavformat code) - our wrapper 
then explicitly uses one from the C runtime (which we don't know the 
size/layout of), and we just copy it field by field into the one we expose 
to the caller.

This could use any random layout, as long as it contains the subset of 
fields from stat that we actually use anywhere.

And if we wanted a static assert, the only relevant assert would be to 
make sure that our wrapper struct's fields are as large as, or larger, 
than the ones that the original stat returns.

// Martin
Nil Admirari May 25, 2022, 7:17 p.m. UTC | #4
> No, it is intended and expected that the structs are different.
> ...
> That's why the structs are different and the fields of
> win32_stat always large enough, no matter which struct 
> is being used internally.

Please document that there is a potential difference in time types
and that the difference is intentional, and that the chosen
time type is always large enough.

Probably it's worthwhile to document that the entire machinery was created
because of POSIX stat function and struct being identically named,
which is not possible to accommodate by a simple macro.

> > > +static inline int win32_access(const char *filename_utf8, int par)
> > > +static inline void copy_stat(struct _stati64 *winstat, struct
> > win32_stat *par)
> > > +static inline int win32_stat(const char *filename_utf8, struct
> > win32_stat *par)
> > > +static inline int win32_fstat(int fd, struct win32_stat *par)
> > 
> > How about renaming par to something more appropriate?
> 
> How? And why?
>
> These functions were always named like that (it just wasn't visible
> as these were constructed through macros).

_access argument is called mode:
https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/access-waccess?view=msvc-170.

_stat and _fstat argument is called a buffer:
https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/stat-functions?view=msvc-170
https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/fstat-fstat32-fstat64-fstati64-fstat32i64-fstat64i32?view=msvc-170
It's no better than par, but you don't have to follow MS.

Somehow winstat refers to parameters of type _stati64, not win32_stat.
I would've called win32_stat params winstat, and _stati64 params crtstat,
but you can always come up with better names.

Inside a macro generic parameter names are a necessity. Functions better be more specific.
Soft Works May 26, 2022, 5:09 a.m. UTC | #5
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of nil-
> admirari@mailo.com
> Sent: Wednesday, May 25, 2022 9:17 PM
> To: ffmpeg-devel@ffmpeg.org
> Subject: Re: [FFmpeg-devel] [PATCH v7 2/3] avformat/os_support: Support
> long file names on Windows
> 
> > No, it is intended and expected that the structs are different.
> > ...
> > That's why the structs are different and the fields of
> > win32_stat always large enough, no matter which struct
> > is being used internally.
> 
> Please document that there is a potential difference in time types
> and that the difference is intentional, and that the chosen
> time type is always large enough.
> 
> Probably it's worthwhile to document that the entire machinery was created
> because of POSIX stat function and struct being identically named,
> which is not possible to accommodate by a simple macro.

Yes, it makes sense to explain that a bit. Will do.


> > > > +static inline int win32_access(const char *filename_utf8, int par)
> > > > +static inline void copy_stat(struct _stati64 *winstat, struct
> > > win32_stat *par)
> > > > +static inline int win32_stat(const char *filename_utf8, struct
> > > win32_stat *par)
> > > > +static inline int win32_fstat(int fd, struct win32_stat *par)
> > >
> > > How about renaming par to something more appropriate?
> >
> > How? And why?
> >
> > These functions were always named like that (it just wasn't visible
> > as these were constructed through macros).
> 
> _access argument is called mode:
> https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/access-
> waccess?view=msvc-170.
> _stat and _fstat argument is called a buffer:
> https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/stat-
> functions?view=msvc-170
> https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/fstat-
> fstat32-fstat64-fstati64-fstat32i64-fstat64i32?view=msvc-170
> It's no better than par, but you don't have to follow MS.

I misread, thinking you wanted to rename the functions. Yes, the par
comes from the macro (which was for building the two functions with 
different parameter name). I have expanded the macro for stat, 
and then the macro would have only existed for access, so I did 
expand that one as well.

So, no problem, I will rename the arguments according to posix.

> Somehow winstat refers to parameters of type _stati64, not win32_stat.

I named it winstat for "win api stat". When you want to go strict
you could say that all the naming is incorrect, because 'win32' 
suggests that all those are about calls to the Win32 API, while
in fact, they are calls to msvcrt.

> I would've called win32_stat params winstat, and _stati64 params crtstat,
> but you can always come up with better names.

win32_stat has nothing to do with Windows, it is actually more a posix_stat,
but it needs to have the same name as the function

Even crtstat would not be quite correct and would rather need to 
be named msvcrtstat :-)

We could drive this further and further and probably it would never
be totally "right". Though, I will make the replacements you asked for,
and then we'll see...


Regarding your concerns about _USE_32BIT_TIME_T, I wanted to mention
that we still have an alternative, to get around this.

Currently, we are using _wstati64, _ffstati64 and _stati64. All of 
those are re-defined depending on whether _USE_32BIT_TIME_T is
defined or not.

Instead of that, we could use _wstat64, _ffstat64 and _stat64. In 
this case, we would always get 64bit time values, independent
of the definition of _USE_32BIT_TIME_T.

The only difference it makes would be whether we can have file times
beyond the year 2038.

I'm fine with the way it is right now, I just wanted to have mentioned
it.

Thanks for reviewing,
softworkz
diff mbox series

Patch

diff --git a/libavformat/os_support.h b/libavformat/os_support.h
index 5e6b32d2dc..1c3b234b06 100644
--- a/libavformat/os_support.h
+++ b/libavformat/os_support.h
@@ -49,11 +49,28 @@ 
 #  ifdef stat
 #   undef stat
 #  endif
-#  define stat _stati64
+
+#  define stat win32_stat
+
+    struct win32_stat
+    {
+        _dev_t         st_dev;     /* ID of device containing file */
+        _ino_t         st_ino;     /* inode number */
+        unsigned short st_mode;    /* protection */
+        short          st_nlink;   /* number of hard links */
+        short          st_uid;     /* user ID of owner */
+        short          st_gid;     /* group ID of owner */
+        _dev_t         st_rdev;    /* device ID (if special file) */
+        int64_t        st_size;    /* total size, in bytes */
+        int64_t        st_atime;   /* time of last access */
+        int64_t        st_mtime;   /* time of last modification */
+        int64_t        st_ctime;   /* time of last status change */
+    };
+
 #  ifdef fstat
 #   undef fstat
 #  endif
-#  define fstat(f,s) _fstati64((f), (s))
+#  define fstat win32_fstat
 #endif /* defined(_WIN32) */
 
 
@@ -153,7 +170,7 @@  static inline int win32_##name(const char *filename_utf8) \
     wchar_t *filename_w;                                  \
     int ret;                                              \
                                                           \
-    if (utf8towchar(filename_utf8, &filename_w))          \
+    if (get_extended_win32_path(filename_utf8, &filename_w)) \
         return -1;                                        \
     if (!filename_w)                                      \
         goto fallback;                                    \
@@ -171,37 +188,76 @@  DEF_FS_FUNCTION(unlink, _wunlink, _unlink)
 DEF_FS_FUNCTION(mkdir,  _wmkdir,  _mkdir)
 DEF_FS_FUNCTION(rmdir,  _wrmdir , _rmdir)
 
-#define DEF_FS_FUNCTION2(name, wfunc, afunc, partype)     \
-static inline int win32_##name(const char *filename_utf8, partype par) \
-{                                                         \
-    wchar_t *filename_w;                                  \
-    int ret;                                              \
-                                                          \
-    if (utf8towchar(filename_utf8, &filename_w))          \
-        return -1;                                        \
-    if (!filename_w)                                      \
-        goto fallback;                                    \
-                                                          \
-    ret = wfunc(filename_w, par);                         \
-    av_free(filename_w);                                  \
-    return ret;                                           \
-                                                          \
-fallback:                                                 \
-    /* filename may be be in CP_ACP */                    \
-    return afunc(filename_utf8, par);                     \
+static inline int win32_access(const char *filename_utf8, int par)
+{
+    wchar_t *filename_w;
+    int ret;
+    if (get_extended_win32_path(filename_utf8, &filename_w))
+        return -1;
+    if (!filename_w)
+        goto fallback;
+    ret = _waccess(filename_w, par);
+    av_free(filename_w);
+    return ret;
+fallback:
+    return _access(filename_utf8, par);
+}
+
+static inline void copy_stat(struct _stati64 *winstat, struct win32_stat *par)
+{
+    par->st_dev   = winstat->st_dev;
+    par->st_ino   = winstat->st_ino;
+    par->st_mode  = winstat->st_mode;
+    par->st_nlink = winstat->st_nlink;
+    par->st_uid   = winstat->st_uid;
+    par->st_gid   = winstat->st_gid;
+    par->st_rdev  = winstat->st_rdev;
+    par->st_size  = winstat->st_size;
+    par->st_atime = winstat->st_atime;
+    par->st_mtime = winstat->st_mtime;
+    par->st_ctime = winstat->st_ctime;
 }
 
-DEF_FS_FUNCTION2(access, _waccess, _access, int)
-DEF_FS_FUNCTION2(stat, _wstati64, _stati64, struct stat*)
+static inline int win32_stat(const char *filename_utf8, struct win32_stat *par)
+{
+    struct _stati64 winstat = { 0 };
+    wchar_t *filename_w;
+    int ret;
+
+    if (get_extended_win32_path(filename_utf8, &filename_w))
+        return -1;
+
+    if (filename_w) {
+        ret = _wstat64(filename_w, &winstat);
+        av_free(filename_w);
+    } else
+        ret = _stat64(filename_utf8, &winstat);
+
+    copy_stat(&winstat, par);
+
+    return ret;
+}
+
+static inline int win32_fstat(int fd, struct win32_stat *par)
+{
+    struct _stati64 winstat = { 0 };
+    int ret;
+
+    ret = _fstat64(fd, &winstat);
+
+    copy_stat(&winstat, par);
+
+    return ret;
+}
 
 static inline int win32_rename(const char *src_utf8, const char *dest_utf8)
 {
     wchar_t *src_w, *dest_w;
     int ret;
 
-    if (utf8towchar(src_utf8, &src_w))
+    if (get_extended_win32_path(src_utf8, &src_w))
         return -1;
-    if (utf8towchar(dest_utf8, &dest_w)) {
+    if (get_extended_win32_path(dest_utf8, &dest_w)) {
         av_free(src_w);
         return -1;
     }