diff mbox series

[FFmpeg-devel,v14,4/5] libavformat: Remove MAX_PATH limit and use UTF-8 version of getenv()

Message ID 20220613162626.11541-4-nil-admirari@mailo.com
State New
Headers show
Series [FFmpeg-devel,v14,1/5] libavutil: Add wchartoutf8(), wchartoansi(), utf8toansi() and getenv_utf8() | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Nil Admirari June 13, 2022, 4:26 p.m. UTC
1. getenv() is replaced with getenv_utf8() across libavformat.
2. New versions of AviSynth+ are now called with UTF-8 filenames.
3. Old versions of AviSynth are still using ANSI strings,
   but MAX_PATH limit on filename is removed.
---
 libavformat/avisynth.c    | 39 +++++++++++++++++++++++++++------------
 libavformat/http.c        | 20 +++++++++++++-------
 libavformat/ipfsgateway.c | 35 +++++++++++++++++++++++------------
 libavformat/tls.c         | 11 +++++++++--
 4 files changed, 72 insertions(+), 33 deletions(-)

Comments

Soft Works June 13, 2022, 5:47 p.m. UTC | #1
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Nil
> Admirari
> Sent: Monday, June 13, 2022 6:26 PM
> To: ffmpeg-devel@ffmpeg.org
> Subject: [FFmpeg-devel] [PATCH v14 4/5] libavformat: Remove MAX_PATH
> limit and use UTF-8 version of getenv()
> 
> 1. getenv() is replaced with getenv_utf8() across libavformat.
> 2. New versions of AviSynth+ are now called with UTF-8 filenames.
> 3. Old versions of AviSynth are still using ANSI strings,
>    but MAX_PATH limit on filename is removed.
> ---
>  libavformat/avisynth.c    | 39 +++++++++++++++++++++++++++----------
> --
>  libavformat/http.c        | 20 +++++++++++++-------
>  libavformat/ipfsgateway.c | 35 +++++++++++++++++++++++------------
>  libavformat/tls.c         | 11 +++++++++--
>  4 files changed, 72 insertions(+), 33 deletions(-)
> 
> diff --git a/libavformat/avisynth.c b/libavformat/avisynth.c
> index 8ba2bdead2..a97d12b6b6 100644
> --- a/libavformat/avisynth.c
> +++ b/libavformat/avisynth.c
> @@ -34,6 +34,7 @@
>  /* Platform-specific directives. */
>  #ifdef _WIN32
>    #include "compat/w32dlfcn.h"
> +  #include "libavutil/wchar_filename.h"
>    #undef EXTERN_C
>    #define AVISYNTH_LIB "avisynth"
>  #else
> @@ -56,6 +57,7 @@ typedef struct AviSynthLibrary {
>  #define AVSC_DECLARE_FUNC(name) name ## _func name
>      AVSC_DECLARE_FUNC(avs_bit_blt);
>      AVSC_DECLARE_FUNC(avs_clip_get_error);
> +    AVSC_DECLARE_FUNC(avs_check_version);
>      AVSC_DECLARE_FUNC(avs_create_script_environment);
>      AVSC_DECLARE_FUNC(avs_delete_script_environment);
>      AVSC_DECLARE_FUNC(avs_get_audio);
> @@ -137,6 +139,7 @@ static av_cold int avisynth_load_library(void)
> 
>      LOAD_AVS_FUNC(avs_bit_blt, 0);
>      LOAD_AVS_FUNC(avs_clip_get_error, 0);
> +    LOAD_AVS_FUNC(avs_check_version, 0);
>      LOAD_AVS_FUNC(avs_create_script_environment, 0);
>      LOAD_AVS_FUNC(avs_delete_script_environment, 0);
>      LOAD_AVS_FUNC(avs_get_audio, 0);
> @@ -807,26 +810,38 @@ static int
> avisynth_create_stream(AVFormatContext *s)
>  static int avisynth_open_file(AVFormatContext *s)
>  {
>      AviSynthContext *avs = s->priv_data;
> -    AVS_Value arg, val;
> +    AVS_Value val;
>      int ret;
> -#ifdef _WIN32
> -    char filename_ansi[MAX_PATH * 4];
> -    wchar_t filename_wc[MAX_PATH * 4];
> -#endif
> 
>      if (ret = avisynth_context_create(s))
>          return ret;
> 
> +    if (!avs_library.avs_check_version(avs->env, 7)) {

I like the version check. I don't know about all the derivatives
of AviSynth, but I assume you have checked that it's valid for
the common ones (or at least the original non-Plus variant)?

> +        AVS_Value args[] = {
> +            avs_new_value_string(s->url),
> +            avs_new_value_bool(1) // filename is in UTF-8
> +        };
> +        val = avs_library.avs_invoke(avs->env, "Import",
> +                                     avs_new_value_array(args, 2),
> 0);
> +    } else {
> +        AVS_Value arg;
>  #ifdef _WIN32
> -    /* Convert UTF-8 to ANSI code page */
> -    MultiByteToWideChar(CP_UTF8, 0, s->url, -1, filename_wc,
> MAX_PATH * 4);
> -    WideCharToMultiByte(CP_THREAD_ACP, 0, filename_wc, -1,
> filename_ansi,
> -                        MAX_PATH * 4, NULL, NULL);
> -    arg = avs_new_value_string(filename_ansi);
> +        char *filename_ansi;
> +        /* Convert UTF-8 to ANSI code page */
> +        if (utf8toansi(s->url, &filename_ansi)) {

Two ideas came to my mind how this could be done better.
What's actually needed here is not a string conversion, we need
a valid and usable filename, and the function could be more
something like "get_ansi_filename()".

The first thing that this function could do is to convert the
the filename to ANSI and right back to UTF-8, then compare the
UTF-8 result with the original UTF-8 string. When both are equal,
we know that the conversion is safe, otherwise we know that it
won't work.

Then, we can use the win32 API GetShortFileName(). Which returns
file and directory names in 8.3 notation which (IIRC) contains
only letters which are valid in the ANSI code page.

8.3 file names do not always exist (depending on system config), 
but it's always worth trying.

Should both of these procedures fail, we could at least output
a useful message, explaining why it doesn't work.

Let me know what you think.

sw
Hendrik Leppkes June 13, 2022, 6:55 p.m. UTC | #2
On Mon, Jun 13, 2022 at 7:47 PM Soft Works <softworkz@hotmail.com> wrote:
>
>
>
> > -----Original Message-----
> > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Nil
> > Admirari
> > Sent: Monday, June 13, 2022 6:26 PM
> > To: ffmpeg-devel@ffmpeg.org
> > Subject: [FFmpeg-devel] [PATCH v14 4/5] libavformat: Remove MAX_PATH
> > limit and use UTF-8 version of getenv()
> >
> > 1. getenv() is replaced with getenv_utf8() across libavformat.
> > 2. New versions of AviSynth+ are now called with UTF-8 filenames.
> > 3. Old versions of AviSynth are still using ANSI strings,
> >    but MAX_PATH limit on filename is removed.
> > ---
> >  libavformat/avisynth.c    | 39 +++++++++++++++++++++++++++----------
> > --
> >  libavformat/http.c        | 20 +++++++++++++-------
> >  libavformat/ipfsgateway.c | 35 +++++++++++++++++++++++------------
> >  libavformat/tls.c         | 11 +++++++++--
> >  4 files changed, 72 insertions(+), 33 deletions(-)
> >
> > diff --git a/libavformat/avisynth.c b/libavformat/avisynth.c
> > index 8ba2bdead2..a97d12b6b6 100644
> > --- a/libavformat/avisynth.c
> > +++ b/libavformat/avisynth.c
> > @@ -34,6 +34,7 @@
> >  /* Platform-specific directives. */
> >  #ifdef _WIN32
> >    #include "compat/w32dlfcn.h"
> > +  #include "libavutil/wchar_filename.h"
> >    #undef EXTERN_C
> >    #define AVISYNTH_LIB "avisynth"
> >  #else
> > @@ -56,6 +57,7 @@ typedef struct AviSynthLibrary {
> >  #define AVSC_DECLARE_FUNC(name) name ## _func name
> >      AVSC_DECLARE_FUNC(avs_bit_blt);
> >      AVSC_DECLARE_FUNC(avs_clip_get_error);
> > +    AVSC_DECLARE_FUNC(avs_check_version);
> >      AVSC_DECLARE_FUNC(avs_create_script_environment);
> >      AVSC_DECLARE_FUNC(avs_delete_script_environment);
> >      AVSC_DECLARE_FUNC(avs_get_audio);
> > @@ -137,6 +139,7 @@ static av_cold int avisynth_load_library(void)
> >
> >      LOAD_AVS_FUNC(avs_bit_blt, 0);
> >      LOAD_AVS_FUNC(avs_clip_get_error, 0);
> > +    LOAD_AVS_FUNC(avs_check_version, 0);
> >      LOAD_AVS_FUNC(avs_create_script_environment, 0);
> >      LOAD_AVS_FUNC(avs_delete_script_environment, 0);
> >      LOAD_AVS_FUNC(avs_get_audio, 0);
> > @@ -807,26 +810,38 @@ static int
> > avisynth_create_stream(AVFormatContext *s)
> >  static int avisynth_open_file(AVFormatContext *s)
> >  {
> >      AviSynthContext *avs = s->priv_data;
> > -    AVS_Value arg, val;
> > +    AVS_Value val;
> >      int ret;
> > -#ifdef _WIN32
> > -    char filename_ansi[MAX_PATH * 4];
> > -    wchar_t filename_wc[MAX_PATH * 4];
> > -#endif
> >
> >      if (ret = avisynth_context_create(s))
> >          return ret;
> >
> > +    if (!avs_library.avs_check_version(avs->env, 7)) {
>
> I like the version check. I don't know about all the derivatives
> of AviSynth, but I assume you have checked that it's valid for
> the common ones (or at least the original non-Plus variant)?
>
> > +        AVS_Value args[] = {
> > +            avs_new_value_string(s->url),
> > +            avs_new_value_bool(1) // filename is in UTF-8
> > +        };
> > +        val = avs_library.avs_invoke(avs->env, "Import",
> > +                                     avs_new_value_array(args, 2),
> > 0);
> > +    } else {
> > +        AVS_Value arg;
> >  #ifdef _WIN32
> > -    /* Convert UTF-8 to ANSI code page */
> > -    MultiByteToWideChar(CP_UTF8, 0, s->url, -1, filename_wc,
> > MAX_PATH * 4);
> > -    WideCharToMultiByte(CP_THREAD_ACP, 0, filename_wc, -1,
> > filename_ansi,
> > -                        MAX_PATH * 4, NULL, NULL);
> > -    arg = avs_new_value_string(filename_ansi);
> > +        char *filename_ansi;
> > +        /* Convert UTF-8 to ANSI code page */
> > +        if (utf8toansi(s->url, &filename_ansi)) {
>
> Two ideas came to my mind how this could be done better.
> What's actually needed here is not a string conversion, we need
> a valid and usable filename, and the function could be more
> something like "get_ansi_filename()".
>
> The first thing that this function could do is to convert the
> the filename to ANSI and right back to UTF-8, then compare the
> UTF-8 result with the original UTF-8 string. When both are equal,
> we know that the conversion is safe, otherwise we know that it
> won't work.
>
> Then, we can use the win32 API GetShortFileName(). Which returns
> file and directory names in 8.3 notation which (IIRC) contains
> only letters which are valid in the ANSI code page.
>

This seems unrelated to this patch, which is about removing the
MAX_PATH limit. The code previously converted UTF-8 to ANSI, and still
does so now, just without the MAX_PATH limit.
Further improvements tangential to this topic can, and should, be
applied independently, and not hold up this patch in discussion-hell
for longer than necessary.

- Hendrik
Soft Works June 13, 2022, 7 p.m. UTC | #3
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of
> Hendrik Leppkes
> Sent: Monday, June 13, 2022 8:55 PM
> To: FFmpeg development discussions and patches <ffmpeg-
> devel@ffmpeg.org>
> Subject: Re: [FFmpeg-devel] [PATCH v14 4/5] libavformat: Remove
> MAX_PATH limit and use UTF-8 version of getenv()
> 
> On Mon, Jun 13, 2022 at 7:47 PM Soft Works <softworkz@hotmail.com>
> wrote:
> >
> >
> >
> > > -----Original Message-----
> > > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of
> Nil
> > > Admirari
> > > Sent: Monday, June 13, 2022 6:26 PM
> > > To: ffmpeg-devel@ffmpeg.org
> > > Subject: [FFmpeg-devel] [PATCH v14 4/5] libavformat: Remove
> MAX_PATH
> > > limit and use UTF-8 version of getenv()
> > >
> > > 1. getenv() is replaced with getenv_utf8() across libavformat.
> > > 2. New versions of AviSynth+ are now called with UTF-8 filenames.
> > > 3. Old versions of AviSynth are still using ANSI strings,
> > >    but MAX_PATH limit on filename is removed.
> > > ---
> > >  libavformat/avisynth.c    | 39 +++++++++++++++++++++++++++------
> ----
> > > --
> > >  libavformat/http.c        | 20 +++++++++++++-------
> > >  libavformat/ipfsgateway.c | 35 +++++++++++++++++++++++----------
> --
> > >  libavformat/tls.c         | 11 +++++++++--
> > >  4 files changed, 72 insertions(+), 33 deletions(-)
> > >
> > > diff --git a/libavformat/avisynth.c b/libavformat/avisynth.c
> > > index 8ba2bdead2..a97d12b6b6 100644
> > > --- a/libavformat/avisynth.c
> > > +++ b/libavformat/avisynth.c
> > > @@ -34,6 +34,7 @@
> > >  /* Platform-specific directives. */
> > >  #ifdef _WIN32
> > >    #include "compat/w32dlfcn.h"
> > > +  #include "libavutil/wchar_filename.h"
> > >    #undef EXTERN_C
> > >    #define AVISYNTH_LIB "avisynth"
> > >  #else
> > > @@ -56,6 +57,7 @@ typedef struct AviSynthLibrary {
> > >  #define AVSC_DECLARE_FUNC(name) name ## _func name
> > >      AVSC_DECLARE_FUNC(avs_bit_blt);
> > >      AVSC_DECLARE_FUNC(avs_clip_get_error);
> > > +    AVSC_DECLARE_FUNC(avs_check_version);
> > >      AVSC_DECLARE_FUNC(avs_create_script_environment);
> > >      AVSC_DECLARE_FUNC(avs_delete_script_environment);
> > >      AVSC_DECLARE_FUNC(avs_get_audio);
> > > @@ -137,6 +139,7 @@ static av_cold int
> avisynth_load_library(void)
> > >
> > >      LOAD_AVS_FUNC(avs_bit_blt, 0);
> > >      LOAD_AVS_FUNC(avs_clip_get_error, 0);
> > > +    LOAD_AVS_FUNC(avs_check_version, 0);
> > >      LOAD_AVS_FUNC(avs_create_script_environment, 0);
> > >      LOAD_AVS_FUNC(avs_delete_script_environment, 0);
> > >      LOAD_AVS_FUNC(avs_get_audio, 0);
> > > @@ -807,26 +810,38 @@ static int
> > > avisynth_create_stream(AVFormatContext *s)
> > >  static int avisynth_open_file(AVFormatContext *s)
> > >  {
> > >      AviSynthContext *avs = s->priv_data;
> > > -    AVS_Value arg, val;
> > > +    AVS_Value val;
> > >      int ret;
> > > -#ifdef _WIN32
> > > -    char filename_ansi[MAX_PATH * 4];
> > > -    wchar_t filename_wc[MAX_PATH * 4];
> > > -#endif
> > >
> > >      if (ret = avisynth_context_create(s))
> > >          return ret;
> > >
> > > +    if (!avs_library.avs_check_version(avs->env, 7)) {
> >
> > I like the version check. I don't know about all the derivatives
> > of AviSynth, but I assume you have checked that it's valid for
> > the common ones (or at least the original non-Plus variant)?
> >
> > > +        AVS_Value args[] = {
> > > +            avs_new_value_string(s->url),
> > > +            avs_new_value_bool(1) // filename is in UTF-8
> > > +        };
> > > +        val = avs_library.avs_invoke(avs->env, "Import",
> > > +                                     avs_new_value_array(args,
> 2),
> > > 0);
> > > +    } else {
> > > +        AVS_Value arg;
> > >  #ifdef _WIN32
> > > -    /* Convert UTF-8 to ANSI code page */
> > > -    MultiByteToWideChar(CP_UTF8, 0, s->url, -1, filename_wc,
> > > MAX_PATH * 4);
> > > -    WideCharToMultiByte(CP_THREAD_ACP, 0, filename_wc, -1,
> > > filename_ansi,
> > > -                        MAX_PATH * 4, NULL, NULL);
> > > -    arg = avs_new_value_string(filename_ansi);
> > > +        char *filename_ansi;
> > > +        /* Convert UTF-8 to ANSI code page */
> > > +        if (utf8toansi(s->url, &filename_ansi)) {
> >
> > Two ideas came to my mind how this could be done better.
> > What's actually needed here is not a string conversion, we need
> > a valid and usable filename, and the function could be more
> > something like "get_ansi_filename()".
> >
> > The first thing that this function could do is to convert the
> > the filename to ANSI and right back to UTF-8, then compare the
> > UTF-8 result with the original UTF-8 string. When both are equal,
> > we know that the conversion is safe, otherwise we know that it
> > won't work.
> >
> > Then, we can use the win32 API GetShortFileName(). Which returns
> > file and directory names in 8.3 notation which (IIRC) contains
> > only letters which are valid in the ANSI code page.
> >
> 
> This seems unrelated to this patch, which is about removing the
> MAX_PATH limit. The code previously converted UTF-8 to ANSI, and
> still
> does so now, just without the MAX_PATH limit.
> Further improvements tangential to this topic can, and should, be
> applied independently, and not hold up this patch in discussion-hell
> for longer than necessary.

It was meant as a suggestion not as an objection. I'm fine with this
patch, just to be clear.

softworkz
Stephen Hutchinson June 13, 2022, 7:07 p.m. UTC | #4
On 6/13/22 2:55 PM, Hendrik Leppkes wrote:
> This seems unrelated to this patch, which is about removing the
> MAX_PATH limit. The code previously converted UTF-8 to ANSI, and still
> does so now, just without the MAX_PATH limit.
> Further improvements tangential to this topic can, and should, be
> applied independently, and not hold up this patch in discussion-hell
> for longer than necessary.
> 

Agreed.  As is stands, if a user finds that they need to open files that 
use non-ANSI characters in their filenames, they can always go into 
their Language settings and turn on UTF-8 for worldwide language 
support, which was just as true before the MAX_PATH-related patches. 
Honestly, Microsoft just needs to stop delaying it and make UTF-8 the 
default, and then we won't have this problem anymore.
Nil Admirari June 13, 2022, 8:53 p.m. UTC | #5
> I like the version check. I don't know about all the derivatives
> of AviSynth, but I assume you have checked that it's valid for
> the common ones (or at least the original non-Plus variant)?

Interface version was changed to 7 in 2020:
https://github.com/AviSynth/AviSynthPlus/commit/40900dc1c54c14ea9f188c7242b88d464d067a44
three years after utf8 was implemented. If I'm not mistaken, there is no way to check
for a particular revision.

> Two ideas came to my mind how this could be done better.
> What's actually needed here is not a string conversion, we need
> a valid and usable filename, and the function could be more
> something like "get_ansi_filename()".
>
> The first thing that this function could do is to convert the
> the filename to ANSI and right back to UTF-8, then compare the
> UTF-8 result with the original UTF-8 string. When both are equal,
> we know that the conversion is safe, otherwise we know that it
> won't work.
>
> Then, we can use the win32 API GetShortFileName(). Which returns
> file and directory names in 8.3 notation which (IIRC) contains
> only letters which are valid in the ANSI code page.
>
> 8.3 file names do not always exist (depending on system config), 
> but it's always worth trying.
>
> Should both of these procedures fail, we could at least output
> a useful message, explaining why it doesn't work.
>
> Let me know what you think.

Too much work for something that was fixed on AviSynth+ side two years ago.
Soft Works June 13, 2022, 9:52 p.m. UTC | #6
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of
> nil-admirari@mailo.com
> Sent: Monday, June 13, 2022 10:53 PM
> To: ffmpeg-devel@ffmpeg.org
> Subject: Re: [FFmpeg-devel] [PATCH v14 4/5] libavformat: Remove
> MAX_PATH limit and use UTF-8 version of getenv()
> 
> > I like the version check. I don't know about all the derivatives
> > of AviSynth, but I assume you have checked that it's valid for
> > the common ones (or at least the original non-Plus variant)?
> 
> Interface version was changed to 7 in 2020:
> https://github.com/AviSynth/AviSynthPlus/commit/40900dc1c54c14ea9f188
> c7242b88d464d067a44
> three years after utf8 was implemented. If I'm not mistaken, there is
> no way to check
> for a particular revision.
> 
> > Two ideas came to my mind how this could be done better.
> > What's actually needed here is not a string conversion, we need
> > a valid and usable filename, and the function could be more
> > something like "get_ansi_filename()".
> >
> > The first thing that this function could do is to convert the
> > the filename to ANSI and right back to UTF-8, then compare the
> > UTF-8 result with the original UTF-8 string. When both are equal,
> > we know that the conversion is safe, otherwise we know that it
> > won't work.
> >
> > Then, we can use the win32 API GetShortFileName(). Which returns
> > file and directory names in 8.3 notation which (IIRC) contains
> > only letters which are valid in the ANSI code page.
> >
> > 8.3 file names do not always exist (depending on system config),
> > but it's always worth trying.
> >
> > Should both of these procedures fail, we could at least output
> > a useful message, explaining why it doesn't work.
> >
> > Let me know what you think.
> 
> Too much work for something that was fixed on AviSynth+ side two
> years ago.

I wasn't sure how important AviSynth is for you. Until I had given
the hint at the UTF8 option in AviSynth, it seemed to be a high
priority to enable the use of long paths with AviSynth, and what
I was thinking is that this might be of interest because paths 
with non-ANSI chars are a way more frequent case than long paths
(on Windows).

Anyway, as long as people are using a non-ancient version of 
AviSynthPlus, it's all covered by your patch, and I'm totally
fine with this part!

Thanks,
sw
Soft Works June 13, 2022, 10:01 p.m. UTC | #7
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of
> Soft Works
> Sent: Monday, June 13, 2022 11:53 PM
> To: FFmpeg development discussions and patches <ffmpeg-
> devel@ffmpeg.org>
> Subject: Re: [FFmpeg-devel] [PATCH v14 4/5] libavformat: Remove
> MAX_PATH limit and use UTF-8 version of getenv()
> 
> 
> 
> > -----Original Message-----
> > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of
> > nil-admirari@mailo.com
> > Sent: Monday, June 13, 2022 10:53 PM
> > To: ffmpeg-devel@ffmpeg.org
> > Subject: Re: [FFmpeg-devel] [PATCH v14 4/5] libavformat: Remove
> > MAX_PATH limit and use UTF-8 version of getenv()
> >
> > > I like the version check. I don't know about all the derivatives
> > > of AviSynth, but I assume you have checked that it's valid for
> > > the common ones (or at least the original non-Plus variant)?
> >
> > Interface version was changed to 7 in 2020:
> >
> https://github.com/AviSynth/AviSynthPlus/commit/40900dc1c54c14ea9f188
> > c7242b88d464d067a44
> > three years after utf8 was implemented. If I'm not mistaken, there
> is
> > no way to check
> > for a particular revision.
> >
> > > Two ideas came to my mind how this could be done better.
> > > What's actually needed here is not a string conversion, we need
> > > a valid and usable filename, and the function could be more
> > > something like "get_ansi_filename()".
> > >
> > > The first thing that this function could do is to convert the
> > > the filename to ANSI and right back to UTF-8, then compare the
> > > UTF-8 result with the original UTF-8 string. When both are equal,
> > > we know that the conversion is safe, otherwise we know that it
> > > won't work.
> > >
> > > Then, we can use the win32 API GetShortFileName(). Which returns
> > > file and directory names in 8.3 notation which (IIRC) contains
> > > only letters which are valid in the ANSI code page.
> > >
> > > 8.3 file names do not always exist (depending on system config),
> > > but it's always worth trying.
> > >
> > > Should both of these procedures fail, we could at least output
> > > a useful message, explaining why it doesn't work.
> > >
> > > Let me know what you think.
> >
> > Too much work for something that was fixed on AviSynth+ side two
> > years ago.
> 
> I wasn't sure how important AviSynth is for you. Until I had given
> the hint at the UTF8 option in AviSynth, it seemed to be a high
> priority to enable the use of long paths with AviSynth, and what
> I was thinking is that this might be of interest because paths
> with non-ANSI chars are a way more frequent case than long paths
> (on Windows).

Before somebody will ask how I come to make such claims, here's 
why: 

As of Win 11, the Explorer still warns or even prevents you from 
creating long paths, but you are free to use any kind of non-ANSI
characters for file and folders names. This does not only apply to 
the file management application, but includes the file open and save
dialogs of all applications that are using the standard dialogs.

sw
diff mbox series

Patch

diff --git a/libavformat/avisynth.c b/libavformat/avisynth.c
index 8ba2bdead2..a97d12b6b6 100644
--- a/libavformat/avisynth.c
+++ b/libavformat/avisynth.c
@@ -34,6 +34,7 @@ 
 /* Platform-specific directives. */
 #ifdef _WIN32
   #include "compat/w32dlfcn.h"
+  #include "libavutil/wchar_filename.h"
   #undef EXTERN_C
   #define AVISYNTH_LIB "avisynth"
 #else
@@ -56,6 +57,7 @@  typedef struct AviSynthLibrary {
 #define AVSC_DECLARE_FUNC(name) name ## _func name
     AVSC_DECLARE_FUNC(avs_bit_blt);
     AVSC_DECLARE_FUNC(avs_clip_get_error);
+    AVSC_DECLARE_FUNC(avs_check_version);
     AVSC_DECLARE_FUNC(avs_create_script_environment);
     AVSC_DECLARE_FUNC(avs_delete_script_environment);
     AVSC_DECLARE_FUNC(avs_get_audio);
@@ -137,6 +139,7 @@  static av_cold int avisynth_load_library(void)
 
     LOAD_AVS_FUNC(avs_bit_blt, 0);
     LOAD_AVS_FUNC(avs_clip_get_error, 0);
+    LOAD_AVS_FUNC(avs_check_version, 0);
     LOAD_AVS_FUNC(avs_create_script_environment, 0);
     LOAD_AVS_FUNC(avs_delete_script_environment, 0);
     LOAD_AVS_FUNC(avs_get_audio, 0);
@@ -807,26 +810,38 @@  static int avisynth_create_stream(AVFormatContext *s)
 static int avisynth_open_file(AVFormatContext *s)
 {
     AviSynthContext *avs = s->priv_data;
-    AVS_Value arg, val;
+    AVS_Value val;
     int ret;
-#ifdef _WIN32
-    char filename_ansi[MAX_PATH * 4];
-    wchar_t filename_wc[MAX_PATH * 4];
-#endif
 
     if (ret = avisynth_context_create(s))
         return ret;
 
+    if (!avs_library.avs_check_version(avs->env, 7)) {
+        AVS_Value args[] = {
+            avs_new_value_string(s->url),
+            avs_new_value_bool(1) // filename is in UTF-8
+        };
+        val = avs_library.avs_invoke(avs->env, "Import",
+                                     avs_new_value_array(args, 2), 0);
+    } else {
+        AVS_Value arg;
 #ifdef _WIN32
-    /* Convert UTF-8 to ANSI code page */
-    MultiByteToWideChar(CP_UTF8, 0, s->url, -1, filename_wc, MAX_PATH * 4);
-    WideCharToMultiByte(CP_THREAD_ACP, 0, filename_wc, -1, filename_ansi,
-                        MAX_PATH * 4, NULL, NULL);
-    arg = avs_new_value_string(filename_ansi);
+        char *filename_ansi;
+        /* Convert UTF-8 to ANSI code page */
+        if (utf8toansi(s->url, &filename_ansi)) {
+            ret = AVERROR_UNKNOWN;
+            goto fail;
+        }
+        arg = avs_new_value_string(filename_ansi);
 #else
-    arg = avs_new_value_string(s->url);
+        arg = avs_new_value_string(s->url);
 #endif
-    val = avs_library.avs_invoke(avs->env, "Import", arg, 0);
+        val = avs_library.avs_invoke(avs->env, "Import", arg, 0);
+#ifdef _WIN32
+        av_free(filename_ansi);
+#endif
+    }
+
     if (avs_is_error(val)) {
         av_log(s, AV_LOG_ERROR, "%s\n", avs_as_error(val));
         ret = AVERROR_UNKNOWN;
diff --git a/libavformat/http.c b/libavformat/http.c
index c8f3f4b6a3..d90117e422 100644
--- a/libavformat/http.c
+++ b/libavformat/http.c
@@ -29,6 +29,7 @@ 
 #include "libavutil/avassert.h"
 #include "libavutil/avstring.h"
 #include "libavutil/bprint.h"
+#include "libavutil/getenv_utf8.h"
 #include "libavutil/opt.h"
 #include "libavutil/time.h"
 #include "libavutil/parseutils.h"
@@ -198,6 +199,7 @@  void ff_http_init_auth_state(URLContext *dest, const URLContext *src)
 static int http_open_cnx_internal(URLContext *h, AVDictionary **options)
 {
     const char *path, *proxy_path, *lower_proto = "tcp", *local_path;
+    char *env_http_proxy, *env_no_proxy;
     char *hashmark;
     char hostname[1024], hoststr[1024], proto[10];
     char auth[1024], proxyauth[1024] = "";
@@ -211,9 +213,13 @@  static int http_open_cnx_internal(URLContext *h, AVDictionary **options)
                  path1, sizeof(path1), s->location);
     ff_url_join(hoststr, sizeof(hoststr), NULL, NULL, hostname, port, NULL);
 
-    proxy_path = s->http_proxy ? s->http_proxy : getenv("http_proxy");
-    use_proxy  = !ff_http_match_no_proxy(getenv("no_proxy"), hostname) &&
+    env_http_proxy = getenv_utf8("http_proxy");
+    proxy_path = s->http_proxy ? s->http_proxy : env_http_proxy;
+
+    env_no_proxy = getenv_utf8("no_proxy");
+    use_proxy  = !ff_http_match_no_proxy(env_no_proxy, hostname) &&
                  proxy_path && av_strstart(proxy_path, "http://", NULL);
+    av_freep(&env_no_proxy);
 
     if (!strcmp(proto, "https")) {
         lower_proto = "tls";
@@ -224,7 +230,7 @@  static int http_open_cnx_internal(URLContext *h, AVDictionary **options)
         if (s->http_proxy) {
             err = av_dict_set(options, "http_proxy", s->http_proxy, 0);
             if (err < 0)
-                return err;
+                goto end;
         }
     }
     if (port < 0)
@@ -259,12 +265,12 @@  static int http_open_cnx_internal(URLContext *h, AVDictionary **options)
         err = ffurl_open_whitelist(&s->hd, buf, AVIO_FLAG_READ_WRITE,
                                    &h->interrupt_callback, options,
                                    h->protocol_whitelist, h->protocol_blacklist, h);
-        if (err < 0)
-            return err;
     }
 
-    return http_connect(h, path, local_path, hoststr,
-                        auth, proxyauth);
+end:
+    av_freep(&env_http_proxy);
+    return err < 0 ? err : http_connect(
+        h, path, local_path, hoststr, auth, proxyauth);
 }
 
 static int http_should_reconnect(HTTPContext *s, int err)
diff --git a/libavformat/ipfsgateway.c b/libavformat/ipfsgateway.c
index 83d52293b4..a8323403f0 100644
--- a/libavformat/ipfsgateway.c
+++ b/libavformat/ipfsgateway.c
@@ -20,6 +20,7 @@ 
  */
 
 #include "libavutil/avstring.h"
+#include "libavutil/getenv_utf8.h"
 #include "libavutil/opt.h"
 #include <sys/stat.h>
 #include "os_support.h"
@@ -55,12 +56,15 @@  static int populate_ipfs_gateway(URLContext *h)
     int stat_ret = 0;
     int ret = AVERROR(EINVAL);
     FILE *gateway_file = NULL;
+    char *env_ipfs_gateway, *env_ipfs_path;
 
     // Test $IPFS_GATEWAY.
-    if (getenv("IPFS_GATEWAY") != NULL) {
-        if (snprintf(c->gateway_buffer, sizeof(c->gateway_buffer), "%s",
-                     getenv("IPFS_GATEWAY"))
-            >= sizeof(c->gateway_buffer)) {
+    env_ipfs_gateway = getenv_utf8("IPFS_GATEWAY");
+    if (env_ipfs_gateway != NULL) {
+        int printed = snprintf(c->gateway_buffer, sizeof(c->gateway_buffer),
+                               "%s", env_ipfs_gateway);
+        av_freep(&env_ipfs_gateway);
+        if (printed >= sizeof(c->gateway_buffer)) {
             av_log(h, AV_LOG_WARNING,
                    "The IPFS_GATEWAY environment variable "
                    "exceeds the maximum length. "
@@ -77,20 +81,25 @@  static int populate_ipfs_gateway(URLContext *h)
 
     // We need to know the IPFS folder to - eventually - read the contents of
     // the "gateway" file which would tell us the gateway to use.
-    if (getenv("IPFS_PATH") == NULL) {
+    env_ipfs_path = getenv_utf8("IPFS_PATH");
+    if (env_ipfs_path == NULL) {
+        char *env_home = getenv_utf8("HOME");
+
         av_log(h, AV_LOG_DEBUG, "$IPFS_PATH is empty.\n");
 
         // Try via the home folder.
-        if (getenv("HOME") == NULL) {
+        if (env_home == NULL) {
             av_log(h, AV_LOG_WARNING, "$HOME appears to be empty.\n");
             ret = AVERROR(EINVAL);
             goto err;
         }
 
         // Verify the composed path fits.
-        if (snprintf(ipfs_full_data_folder, sizeof(ipfs_full_data_folder),
-                     "%s/.ipfs/", getenv("HOME"))
-            >= sizeof(ipfs_full_data_folder)) {
+        int printed = snprintf(
+            ipfs_full_data_folder, sizeof(ipfs_full_data_folder),
+            "%s/.ipfs/", env_home);
+        av_freep(&env_home);
+        if (printed >= sizeof(ipfs_full_data_folder)) {
             av_log(h, AV_LOG_WARNING,
                    "The IPFS data path exceeds the "
                    "max path length (%zu)\n",
@@ -113,9 +122,11 @@  static int populate_ipfs_gateway(URLContext *h)
             goto err;
         }
     } else {
-        if (snprintf(ipfs_full_data_folder, sizeof(ipfs_full_data_folder), "%s",
-                     getenv("IPFS_PATH"))
-            >= sizeof(ipfs_full_data_folder)) {
+        int printed = snprintf(
+            ipfs_full_data_folder, sizeof(ipfs_full_data_folder),
+            "%s", env_ipfs_path);
+        av_freep(&env_ipfs_path);
+        if (printed >= sizeof(ipfs_full_data_folder)) {
             av_log(h, AV_LOG_WARNING,
                    "The IPFS_PATH environment variable "
                    "exceeds the maximum length. "
diff --git a/libavformat/tls.c b/libavformat/tls.c
index 302c0f8d59..ea68f18d3a 100644
--- a/libavformat/tls.c
+++ b/libavformat/tls.c
@@ -26,6 +26,7 @@ 
 #include "url.h"
 #include "tls.h"
 #include "libavutil/avstring.h"
+#include "libavutil/getenv_utf8.h"
 #include "libavutil/opt.h"
 #include "libavutil/parseutils.h"
 
@@ -60,6 +61,7 @@  int ff_tls_open_underlying(TLSShared *c, URLContext *parent, const char *uri, AV
     char buf[200], opts[50] = "";
     struct addrinfo hints = { 0 }, *ai = NULL;
     const char *proxy_path;
+    char *env_http_proxy, *env_no_proxy;
     int use_proxy;
 
     set_options(c, uri);
@@ -89,9 +91,13 @@  int ff_tls_open_underlying(TLSShared *c, URLContext *parent, const char *uri, AV
     if (!c->host && !(c->host = av_strdup(c->underlying_host)))
         return AVERROR(ENOMEM);
 
-    proxy_path = c->http_proxy ? c->http_proxy : getenv("http_proxy");
-    use_proxy = !ff_http_match_no_proxy(getenv("no_proxy"), c->underlying_host) &&
+    env_http_proxy = getenv_utf8("http_proxy");
+    proxy_path = c->http_proxy ? c->http_proxy : env_http_proxy;
+
+    env_no_proxy = getenv_utf8("no_proxy");
+    use_proxy = !ff_http_match_no_proxy(env_no_proxy, c->underlying_host) &&
                 proxy_path && av_strstart(proxy_path, "http://", NULL);
+    av_freep(&env_no_proxy);
 
     if (use_proxy) {
         char proxy_host[200], proxy_auth[200], dest[200];
@@ -104,6 +110,7 @@  int ff_tls_open_underlying(TLSShared *c, URLContext *parent, const char *uri, AV
                     proxy_port, "/%s", dest);
     }
 
+    av_freep(&env_http_proxy);
     return ffurl_open_whitelist(&c->tcp, buf, AVIO_FLAG_READ_WRITE,
                                 &parent->interrupt_callback, options,
                                 parent->protocol_whitelist, parent->protocol_blacklist, parent);