diff mbox series

[FFmpeg-devel,09/10] lavf/file: handle standard file:// URLs.

Message ID 20210727144813.452917-10-george@nsup.org
State New
Headers show
Series [FFmpeg-devel,01/10] lavu/internal: add hex to int functions. | expand

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate success Make fate finished
andriy/PPC64_make success Make finished
andriy/PPC64_make_fate success Make fate finished

Commit Message

Nicolas George July 27, 2021, 2:48 p.m. UTC
Signed-off-by: Nicolas George <george@nsup.org>
---
 libavformat/file.c | 54 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 53 insertions(+), 1 deletion(-)

Comments

Marton Balint July 27, 2021, 8:56 p.m. UTC | #1
On Tue, 27 Jul 2021, Nicolas George wrote:

> Signed-off-by: Nicolas George <george@nsup.org>
> ---
> libavformat/file.c | 54 +++++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 53 insertions(+), 1 deletion(-)

You should mention the relevant rfc - RFC8089.

Also there are a couple of common deviations/extensions from the 
normative standard, referenced in the RFC:

https://datatracker.ietf.org/doc/html/rfc8089#appendix-F

Do you plan to support those?

>
> diff --git a/libavformat/file.c b/libavformat/file.c
> index 2fb93c23fd..82d9e7bab4 100644
> --- a/libavformat/file.c
> +++ b/libavformat/file.c
> @@ -20,6 +20,7 @@
>  */
>
> #include "libavutil/avstring.h"
> +#include "libavutil/bprint.h"
> #include "libavutil/internal.h"
> #include "libavutil/opt.h"
> #include "avformat.h"
> @@ -355,10 +356,61 @@ static int file_close_dir(URLContext *h)
>
> #if CONFIG_FILE_PROTOCOL
>
> +/**
> + * De-escape %hh. Return 0 if no de-escaping needed.
> + */
> +static int url_de_escape(void *log, const char *filename, AVBPrint *out)
> +{
> +    const char *in;
> +
> +    for (in = filename; *in; in++)
> +        if (*in == '%' || *in == '#' || *in == '?')
> +            break;
> +    if (!*in)
> +        return 0;
> +    for (in = filename; *in; in++) {
> +        if (*in == '#' || *in == '?')
> +            break;
> +        if (*in == '%') {
> +            int a = ff_hexpair2int(in + 1);
> +            if (a < 0) {
> +                av_log(log, AV_LOG_ERROR, "Invalid %% char in URL.\n");
> +                return AVERROR(EINVAL);
> +            }
> +            av_bprint_chars(out, a, 1);
> +            in += 2;
> +        } else {
> +            av_bprint_chars(out, *in, 1);
> +        }
> +    }
> +    if (!av_bprint_is_complete(out))
> +        return AVERROR(ENOMEM);
> +    return 1;
> +}
> +
> static int file_open(URLContext *h, const char *filename, int flags)
> {
> +    AVBPrint decoded;
> +    int ret;
> +
>     av_strstart(filename, "file:", &filename);
> -    return file_open_common(h, filename, flags);
> +    av_bprint_init(&decoded, 1, AV_BPRINT_SIZE_UNLIMITED);
> +    if (filename[0] == '/' && filename[1] == '/' && filename[2] == '/') {
> +        filename += 2;
> +        ret = url_de_escape(h, filename, &decoded);
> +        if (ret < 0) {
> +            av_bprint_finalize(&decoded, NULL);
> +            return ret;
> +        }
> +        if (ret)
> +            filename = decoded.str;
> +    } else {
> +        av_log(h, AV_LOG_WARNING,
> +               "'file:path' is deprecated, use 'fs:path' or a standard 'file:///' URL\n");

Well, I guess this is needed for compatiblity, but as far as I see, the 
RFC supports the minimal repepresentation of local file path, so RFC-wise 
this is also valid. So what is the plan here? We will do unescaping and 
remove the deperecation message somewhere in the future?

What about the file://host/file syntax which - at least on Windows - 
should be transformed to //host/file?

Also the question of relative resolution comes to mind, there is some code 
in libavformat/url.c which also handles file path specially, that should 
also be updated, right?

Thanks,
Marton

> +    }
> +    ret = file_open_common(h, filename, flags);
> +    av_bprint_finalize(&decoded, NULL);
> +    return ret;
> }
>
> const URLProtocol ff_file_protocol = {
> -- 
> 2.30.2
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Nicolas George July 28, 2021, 9:16 a.m. UTC | #2
Marton Balint (12021-07-27):
> You should mention the relevant rfc - RFC8089.

Locally added.

> Also there are a couple of common deviations/extensions from the normative
> standard, referenced in the RFC:
> 
> https://datatracker.ietf.org/doc/html/rfc8089#appendix-F
> 
> Do you plan to support those?

No. But at a quick glance, nothing is required to support them.

> Well, I guess this is needed for compatiblity, but as far as I see, the RFC
> supports the minimal repepresentation of local file path, so RFC-wise this
> is also valid. So what is the plan here? We will do unescaping and remove
> the deperecation message somewhere in the future?

I know the RFC accepts it, but honestly, semi-relative URLs, where the
scheme is specified, but then a relative path is used, do not make any
sense, especially for the file scheme but really for all schemes.

While standard absolute file:// URLs do happen quite easily (file
choosers can produce one; I had to implement a work-around in my
application), the odds that we meet a semi-relative one is zero.

On the other hand, we need backward compatibility. So I think reserving
this case for compatibility is completely ok.

> What about the file://host/file syntax which - at least on Windows - should
> be transformed to //host/file?

I do not intend to implement this, but if somebody wants to, the double
slash is still enough to distinguish from the compatibility case, so we
will be good.

> Also the question of relative resolution comes to mind, there is some code
> in libavformat/url.c which also handles file path specially, that should
> also be updated, right?

Good catch. I added this patch in the series. It passes FATE.

I will re-send the full series after giving more time for comments.

Regards,
Marton Balint July 29, 2021, 9:14 p.m. UTC | #3
On Wed, 28 Jul 2021, Nicolas George wrote:

> Marton Balint (12021-07-27):
>> You should mention the relevant rfc - RFC8089.
>
> Locally added.
>
>> Also there are a couple of common deviations/extensions from the normative
>> standard, referenced in the RFC:
>>
>> https://datatracker.ietf.org/doc/html/rfc8089#appendix-F
>>
>> Do you plan to support those?
>
> No. But at a quick glance, nothing is required to support them.
>
>> Well, I guess this is needed for compatiblity, but as far as I see, the RFC
>> supports the minimal repepresentation of local file path, so RFC-wise this
>> is also valid. So what is the plan here? We will do unescaping and remove
>> the deperecation message somewhere in the future?
>
> I know the RFC accepts it, but honestly, semi-relative URLs, where the
> scheme is specified, but then a relative path is used, do not make any
> sense, especially for the file scheme but really for all schemes.

I meant the absolute path case from Appendix B:

    o  The minimal representation of a local file with no authority field
       and an absolute path that begins with a slash "/".  For example:

       *  "file:/path/to/file"

>
> While standard absolute file:// URLs do happen quite easily (file
> choosers can produce one; I had to implement a work-around in my
> application), the odds that we meet a semi-relative one is zero.
>
> On the other hand, we need backward compatibility. So I think reserving
> this case for compatibility is completely ok.

I can agree with this, but all the deviations from the normative RFC 
should be explicitly documented.

>
>> What about the file://host/file syntax which - at least on Windows - should
>> be transformed to //host/file?
>
> I do not intend to implement this, but if somebody wants to, the double
> slash is still enough to distinguish from the compatibility case, so we
> will be good.

Well, it will break compatiblity for cases like
file://server/share/100%.txt because now percent encoding will be 
assumed...

Maybe as the first step percent encoding should only be used for the 
3-slash form, and the deprecation warning should be printed for the 
less-than-3-slash form. (People wanting percent encoding may use the 
4-slash form which is also a thing...)

And after 2 years of deprecation period the warning should be removed and 
percent encoded form can be used for all the cases.

This way eventually the file: handling can be fully RFC compliant.

>
>> Also the question of relative resolution comes to mind, there is some code
>> in libavformat/url.c which also handles file path specially, that should
>> also be updated, right?
>
> Good catch. I added this patch in the series. It passes FATE.
>
> I will re-send the full series after giving more time for comments.
>
> Regards,
>

Thanks,
Marton
Nicolas George Aug. 14, 2021, 8:17 a.m. UTC | #4
Marton Balint (12021-07-29):
> I meant the absolute path case from Appendix B:
> 
>    o  The minimal representation of a local file with no authority field
>       and an absolute path that begins with a slash "/".  For example:
> 
>       *  "file:/path/to/file"

It is precisely what I call semi-relative. Do you know of any software
that outputs this kind of URL?

I know software that produces file:/// URLs with percent encoding, so
FFmpeg needs to accept them. But file:/ with percent encoding, I do not,
and therefore we have freedom over them.

> Well, it will break compatiblity for cases like
> file://server/share/100%.txt because now percent encoding will be assumed...
> 
> Maybe as the first step percent encoding should only be used for the 3-slash
> form, and the deprecation warning should be printed for the
> less-than-3-slash form. (People wanting percent encoding may use the 4-slash
> form which is also a thing...)

Indeed. Or we can make a smarted detection: check if there are percents
and they are followed by hex digits.

> And after 2 years of deprecation period the warning should be removed and
> percent encoded form can be used for all the cases.
> 
> This way eventually the file: handling can be fully RFC compliant.

This is the ultimate goal, of course.

Regards,
diff mbox series

Patch

diff --git a/libavformat/file.c b/libavformat/file.c
index 2fb93c23fd..82d9e7bab4 100644
--- a/libavformat/file.c
+++ b/libavformat/file.c
@@ -20,6 +20,7 @@ 
  */
 
 #include "libavutil/avstring.h"
+#include "libavutil/bprint.h"
 #include "libavutil/internal.h"
 #include "libavutil/opt.h"
 #include "avformat.h"
@@ -355,10 +356,61 @@  static int file_close_dir(URLContext *h)
 
 #if CONFIG_FILE_PROTOCOL
 
+/**
+ * De-escape %hh. Return 0 if no de-escaping needed.
+ */
+static int url_de_escape(void *log, const char *filename, AVBPrint *out)
+{
+    const char *in;
+
+    for (in = filename; *in; in++)
+        if (*in == '%' || *in == '#' || *in == '?')
+            break;
+    if (!*in)
+        return 0;
+    for (in = filename; *in; in++) {
+        if (*in == '#' || *in == '?')
+            break;
+        if (*in == '%') {
+            int a = ff_hexpair2int(in + 1);
+            if (a < 0) {
+                av_log(log, AV_LOG_ERROR, "Invalid %% char in URL.\n");
+                return AVERROR(EINVAL);
+            }
+            av_bprint_chars(out, a, 1);
+            in += 2;
+        } else {
+            av_bprint_chars(out, *in, 1);
+        }
+    }
+    if (!av_bprint_is_complete(out))
+        return AVERROR(ENOMEM);
+    return 1;
+}
+
 static int file_open(URLContext *h, const char *filename, int flags)
 {
+    AVBPrint decoded;
+    int ret;
+
     av_strstart(filename, "file:", &filename);
-    return file_open_common(h, filename, flags);
+    av_bprint_init(&decoded, 1, AV_BPRINT_SIZE_UNLIMITED);
+    if (filename[0] == '/' && filename[1] == '/' && filename[2] == '/') {
+        filename += 2;
+        ret = url_de_escape(h, filename, &decoded);
+        if (ret < 0) {
+            av_bprint_finalize(&decoded, NULL);
+            return ret;
+        }
+        if (ret)
+            filename = decoded.str;
+    } else {
+        av_log(h, AV_LOG_WARNING,
+               "'file:path' is deprecated, use 'fs:path' or a standard 'file:///' URL\n");
+    }
+    ret = file_open_common(h, filename, flags);
+    av_bprint_finalize(&decoded, NULL);
+    return ret;
 }
 
 const URLProtocol ff_file_protocol = {