diff mbox series

[FFmpeg-devel,v10,1/1] avformat: Add IPFS protocol support.

Message ID 20220330114039.69510-2-markg85@gmail.com
State New
Headers show
Series Add IPFS protocol support. | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_aarch64_jetson success Make finished
andriy/make_fate_aarch64_jetson success Make fate finished
andriy/make_armv7_RPi4 success Make finished
andriy/make_fate_armv7_RPi4 success Make fate finished

Commit Message

Mark Gaiser March 30, 2022, 11:40 a.m. UTC
This patch adds support for:
- ffplay ipfs://<cid>
- ffplay ipns://<cid>

IPFS data can be played from so called "ipfs gateways".
A gateway is essentially a webserver that gives access to the
distributed IPFS network.

This protocol support (ipfs and ipns) therefore translates
ipfs:// and ipns:// to a http:// url. This resulting url is
then handled by the http protocol. It could also be https
depending on the gateway provided.

To use this protocol, a gateway must be provided.
If you do nothing it will try to find it in your
$HOME/.ipfs/gateway file. The ways to set it manually are:
1. Define a -gateway <url> to the gateway.
2. Define $IPFS_GATEWAY with the full http link to the gateway.
3. Define $IPFS_PATH and point it to the IPFS data path.
4. Have IPFS running in your local user folder (under $HOME/.ipfs).

Signed-off-by: Mark Gaiser <markg85@gmail.com>
---
 configure                 |   2 +
 doc/protocols.texi        |  30 ++++
 libavformat/Makefile      |   2 +
 libavformat/ipfsgateway.c | 309 ++++++++++++++++++++++++++++++++++++++
 libavformat/protocols.c   |   2 +
 5 files changed, 345 insertions(+)
 create mode 100644 libavformat/ipfsgateway.c

Comments

Andreas Rheinhardt March 30, 2022, 12:20 p.m. UTC | #1
Mark Gaiser:
> This patch adds support for:
> - ffplay ipfs://<cid>
> - ffplay ipns://<cid>
> 
> IPFS data can be played from so called "ipfs gateways".
> A gateway is essentially a webserver that gives access to the
> distributed IPFS network.
> 
> This protocol support (ipfs and ipns) therefore translates
> ipfs:// and ipns:// to a http:// url. This resulting url is
> then handled by the http protocol. It could also be https
> depending on the gateway provided.
> 
> To use this protocol, a gateway must be provided.
> If you do nothing it will try to find it in your
> $HOME/.ipfs/gateway file. The ways to set it manually are:
> 1. Define a -gateway <url> to the gateway.
> 2. Define $IPFS_GATEWAY with the full http link to the gateway.
> 3. Define $IPFS_PATH and point it to the IPFS data path.
> 4. Have IPFS running in your local user folder (under $HOME/.ipfs).
> 
> Signed-off-by: Mark Gaiser <markg85@gmail.com>
> ---
>  configure                 |   2 +
>  doc/protocols.texi        |  30 ++++
>  libavformat/Makefile      |   2 +
>  libavformat/ipfsgateway.c | 309 ++++++++++++++++++++++++++++++++++++++
>  libavformat/protocols.c   |   2 +
>  5 files changed, 345 insertions(+)
>  create mode 100644 libavformat/ipfsgateway.c
> 
> diff --git a/configure b/configure
> index e4d36aa639..55af90957a 100755
> --- a/configure
> +++ b/configure
> @@ -3579,6 +3579,8 @@ udp_protocol_select="network"
>  udplite_protocol_select="network"
>  unix_protocol_deps="sys_un_h"
>  unix_protocol_select="network"
> +ipfs_protocol_select="https_protocol"
> +ipns_protocol_select="https_protocol"
>  
>  # external library protocols
>  libamqp_protocol_deps="librabbitmq"
> diff --git a/doc/protocols.texi b/doc/protocols.texi
> index d207df0b52..7c9c0a4808 100644
> --- a/doc/protocols.texi
> +++ b/doc/protocols.texi
> @@ -2025,5 +2025,35 @@ decoding errors.
>  
>  @end table
>  
> +@section ipfs
> +
> +InterPlanetary File System (IPFS) protocol support. One can access files stored 
> +on the IPFS network through so called gateways. Those are http(s) endpoints.
> +This protocol wraps the IPFS native protocols (ipfs:// and ipns://) to be send 
> +to such a gateway. Users can (and should) host their own node which means this 
> +protocol will use your local machine gateway to access files on the IPFS network.
> +
> +If a user doesn't have a node of their own then the public gateway dweb.link is 
> +used by default.
> +
> +You can use this protocol in 2 ways. Using IPFS:
> +@example
> +ffplay ipfs://QmbGtJg23skhvFmu9mJiePVByhfzu5rwo74MEkVDYAmF5T
> +@end example
> +
> +Or the IPNS protocol (IPNS is mutable IPFS):
> +@example
> +ffplay ipns://QmbGtJg23skhvFmu9mJiePVByhfzu5rwo74MEkVDYAmF5T
> +@end example
> +
> +You can also change the gateway to be used:
> +
> +@table @option
> +
> +@item gateway
> +Defines the gateway to use. When nothing is provided the protocol will first try 
> +your local gateway. If that fails dweb.link will be used.
> +
> +@end table
>  
>  @c man end PROTOCOLS
> diff --git a/libavformat/Makefile b/libavformat/Makefile
> index d7182d6bd8..e3233fd7ac 100644
> --- a/libavformat/Makefile
> +++ b/libavformat/Makefile
> @@ -660,6 +660,8 @@ OBJS-$(CONFIG_SRTP_PROTOCOL)             += srtpproto.o srtp.o
>  OBJS-$(CONFIG_SUBFILE_PROTOCOL)          += subfile.o
>  OBJS-$(CONFIG_TEE_PROTOCOL)              += teeproto.o tee_common.o
>  OBJS-$(CONFIG_TCP_PROTOCOL)              += tcp.o
> +OBJS-$(CONFIG_IPFS_PROTOCOL)             += ipfsgateway.o
> +OBJS-$(CONFIG_IPNS_PROTOCOL)             += ipfsgateway.o
>  TLS-OBJS-$(CONFIG_GNUTLS)                += tls_gnutls.o
>  TLS-OBJS-$(CONFIG_LIBTLS)                += tls_libtls.o
>  TLS-OBJS-$(CONFIG_MBEDTLS)               += tls_mbedtls.o
> diff --git a/libavformat/ipfsgateway.c b/libavformat/ipfsgateway.c
> new file mode 100644
> index 0000000000..1a039589c0
> --- /dev/null
> +++ b/libavformat/ipfsgateway.c
> @@ -0,0 +1,309 @@
> +/*
> + * IPFS and IPNS protocol support through IPFS Gateway.
> + * Copyright (c) 2022 Mark Gaiser
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "avformat.h"
> +#include "libavutil/avassert.h"

Unused.

> +#include "libavutil/avstring.h"
> +#include "libavutil/internal.h"
> +#include "libavutil/opt.h"
> +#include "libavutil/tree.h"

?

> +#include <fcntl.h>
> +#if HAVE_IO_H
> +#include <io.h>
> +#endif
> +#if HAVE_UNISTD_H
> +#include <unistd.h>
> +#endif
> +#include "os_support.h"
> +#include "url.h"
> +#include <stdlib.h>
> +#include <sys/stat.h>
> +
> +typedef struct IPFSGatewayContext {
> +    AVClass *class;
> +    URLContext *inner;
> +    // Is filled by the -gateway argument and not changed after.
> +    char *gateway;
> +    // If the above gateway is non null, it will be copied into this buffer.
> +    // Else this buffer will contain the auto detected gateway.
> +    // In either case, the gateway to use will be in this buffer.
> +    char gateway_buffer[PATH_MAX];
> +} IPFSGatewayContext;
> +
> +// A best-effort way to find the IPFS gateway.
> +// Only the most appropiate gateway is set. It's not actually requested
> +// (http call) to prevent a potential slowdown in startup. A potential timeout
> +// is handled by the HTTP protocol.
> +static int populate_ipfs_gateway(URLContext *h)
> +{
> +    IPFSGatewayContext *c = h->priv_data;
> +    char ipfs_full_data_folder[PATH_MAX];
> +    char ipfs_gateway_file[PATH_MAX];
> +    struct stat st;
> +    int stat_ret = 0;
> +    int ret = AVERROR(EINVAL);
> +    FILE *gateway_file = NULL;
> +
> +    // Test $IPFS_GATEWAY.
> +    if (getenv("IPFS_GATEWAY") != NULL) {
> +        if (snprintf(c->gateway_buffer, sizeof(c->gateway_buffer), "%s",
> +                     getenv("IPFS_GATEWAY")) >= sizeof(c->gateway_buffer)) {
> +            av_log(h, AV_LOG_WARNING, "The IPFS_GATEWAY environment variable exceeds the maximum length. We allow a max of %zu characters\n", sizeof(c->gateway_buffer));
> +            ret = AVERROR(EINVAL);
> +            goto err;
> +        }
> +
> +        ret = 1;
> +        goto err;
> +    } else
> +        av_log(h, AV_LOG_DEBUG, "$IPFS_GATEWAY is empty.\n");
> +
> +    // We need to know the IPFS folder to - eventually - read the contents of
> +    // the "gateway" file which would tell us the gateway to use.
> +    if (getenv("IPFS_PATH") == NULL) {
> +        av_log(h, AV_LOG_DEBUG, "$IPFS_PATH is empty.\n");
> +
> +        // Try via the home folder.
> +        if (getenv("HOME") == NULL) {
> +            av_log(h, AV_LOG_WARNING, "$HOME appears to be empty.\n");
> +            ret = AVERROR(EINVAL);
> +            goto err;
> +        }
> +
> +        // Verify the composed path fits.
> +        if (snprintf(ipfs_full_data_folder, sizeof(ipfs_full_data_folder),
> +                     "%s/.ipfs/", getenv("HOME")) >= sizeof(ipfs_full_data_folder)) {
> +            av_log(h, AV_LOG_WARNING, "The IPFS data path exceeds the max path length (%zu)\n", sizeof(ipfs_full_data_folder));
> +            ret = AVERROR(EINVAL);
> +            goto err;
> +        }
> +
> +        // Stat the folder.
> +        // It should exist in a default IPFS setup when run as local user.
> +#ifndef _WIN32
> +        stat_ret = stat(ipfs_full_data_folder, &st);
> +#else
> +        stat_ret = win32_stat(ipfs_full_data_folder, &st);
> +#endif
> +        if (stat_ret < 0) {
> +            av_log(h, AV_LOG_INFO, "Unable to find IPFS folder. We tried:\n");
> +            av_log(h, AV_LOG_INFO, "- $IPFS_PATH, which was empty.\n");
> +            av_log(h, AV_LOG_INFO, "- $HOME/.ipfs (full uri: %s) which doesn't exist.\n", ipfs_full_data_folder);
> +            ret = AVERROR(ENOENT);
> +            goto err;
> +        }
> +    } else {
> +        if (snprintf(ipfs_full_data_folder, sizeof(ipfs_full_data_folder), "%s",
> +                 getenv("IPFS_PATH")) >= sizeof(ipfs_full_data_folder)) {
> +            av_log(h, AV_LOG_WARNING, "The IPFS_PATH environment variable exceeds the maximum length. We allow a max of %zu characters\n", sizeof(c->gateway_buffer));
> +            ret = AVERROR(EINVAL);
> +            goto err;
> +        }
> +
> +    }
> +
> +    // Copy the fully composed gateway path into ipfs_gateway_file.
> +    if (snprintf(ipfs_gateway_file, sizeof(ipfs_gateway_file), "%sgateway",
> +                 ipfs_full_data_folder) >= sizeof(ipfs_gateway_file)) {
> +        av_log(h, AV_LOG_WARNING, "The IPFS gateway file path exceeds the max path length (%zu)\n", sizeof(ipfs_gateway_file));
> +        ret = AVERROR(ENOENT);
> +        goto err;
> +    }
> +
> +    // Get the contents of the gateway file.
> +    gateway_file = av_fopen_utf8(ipfs_gateway_file, "r");
> +    if (!gateway_file) {
> +        av_log(h, AV_LOG_WARNING, "The IPFS gateway file (full uri: %s) doesn't exist. Is the gateway enabled?\n", ipfs_gateway_file);
> +        ret = AVERROR(ENOENT);
> +        goto err;
> +    }
> +
> +    // Read a single line (fgets stops at new line mark).
> +    fgets(c->gateway_buffer, sizeof(c->gateway_buffer) - 1, gateway_file);
> +
> +    // Replace the last char with \0
> +    c->gateway_buffer[sizeof(c->gateway_buffer) - 1] = 0;
> +
> +    // Replace first occurence of end of line with \0
> +    c->gateway_buffer[strcspn(c->gateway_buffer, "\r")] = 0;
> +    c->gateway_buffer[strcspn(c->gateway_buffer, "\n")] = 0;

If the buffer contains both \r and \n and the first \n precedes the
first \r, then the above zeroes both the first \r and the first \n. If
it is enough to only zero the first newline, then this can be simplified
to "c->gateway_buffer[strcspn(c->gateway_buffer, "\r\n")] = 0;".

> +
> +    // If strlen finds anything longer then 0 characters then we have a
> +    // potential gateway url.
> +    if (strlen(c->gateway_buffer) < 1) {

if (*c->gateway_buffer == '\0')

> +        av_log(h, AV_LOG_WARNING, "The IPFS gateway file (full uri: %s) appears to be empty. Is the gateway started?\n", ipfs_gateway_file);
> +        ret = AVERROR(EILSEQ);
> +        goto err;
> +    } else {
> +        // We're done, the c->gateway_buffer has something that looks valid.
> +        ret = 1;
> +        goto err;
> +    }
> +
> +err:
> +    if (gateway_file)
> +        fclose(gateway_file);
> +
> +    return ret;
> +}
> +
> +static int translate_ipfs_to_http(URLContext *h, const char *uri,
> +                                  int flags, AVDictionary **options)
> +{
> +    const char *ipfs_cid;
> +    char *fulluri = NULL;
> +    int ret;
> +    IPFSGatewayContext *c = h->priv_data;
> +
> +    // Test for ipfs://, ipfs:, ipns:// and ipns:. This prefix is stripped from
> +    // the string leaving just the CID in ipfs_cid.
> +    int is_ipfs = av_stristart(uri, "ipfs://", &ipfs_cid);
> +    int is_ipns = av_stristart(uri, "ipns://", &ipfs_cid);
> +
> +    // We must have either ipns or ipfs.
> +    if (!is_ipfs && !is_ipns) {
> +        ret = AVERROR(EINVAL);
> +        av_log(h, AV_LOG_WARNING, "Unsupported url %s\n", uri);
> +        goto err;
> +    }
> +
> +    // If the CID has a length greater then 0 then we assume we have a proper working one.
> +    // It could still be wrong but in that case the gateway should save us and
> +    // ruturn a 403 error. The http protocol handles this.
> +    if (strlen(ipfs_cid) < 1) {
> +        av_log(h, AV_LOG_WARNING, "A CID must be provided.\n");
> +        ret = AVERROR(EILSEQ);
> +        goto err;
> +    }
> +
> +    // Populate c->gateway_buffer with whatever is in c->gateway
> +    if (c->gateway != NULL) {
> +        if (snprintf(c->gateway_buffer, sizeof(c->gateway_buffer), "%s",
> +                     c->gateway) >= sizeof(c->gateway_buffer)) {
> +            av_log(h, AV_LOG_WARNING, "The -gateway parameter is too long. We allow a max of %zu characters\n", sizeof(c->gateway_buffer));

We typically use SIZE_SPECIFIER instead of z for compatibility with
ancient versions of MSVC.
(I don't know whether there is any supported version of MSVC that
doesn't support z; I don't use MSVC myself.)

> +            ret = AVERROR(EINVAL);
> +            goto err;
> +        }
> +    } else {
> +        // Populate the IPFS gateway if we have any.
> +        // If not, inform the user how to properly set one.
> +        ret = populate_ipfs_gateway(h);
> +
> +        if (ret < 1) {
> +            // We fallback on dweb.link (managed by Protocol Labs).
> +            snprintf(c->gateway_buffer, sizeof(c->gateway_buffer), "https://dweb.link");
> +
> +            av_log(h, AV_LOG_WARNING, "IPFS does not appear to be running. You’re now using the public gateway at dweb.link.\n");
> +            av_log(h, AV_LOG_INFO, "Installing IPFS locally is recommended to improve performance and reliability, and not share all your activity with a single IPFS gateway.\n");
> +            av_log(h, AV_LOG_INFO, "There are multiple options to define this gateway.\n");
> +            av_log(h, AV_LOG_INFO, "1. Call ffmpeg with a gateway param, without a trailing slash: -gateway <url>.\n");
> +            av_log(h, AV_LOG_INFO, "2. Define an $IPFS_GATEWAY environment variable with the full HTTP URL to the gateway without trailing forward slash.\n");
> +            av_log(h, AV_LOG_INFO, "3. Define an $IPFS_PATH environment variable and point it to the IPFS data path - this is typically ~/.ipfs\n");

All those AV_LOG_INFO can be combined which has the advantage that the
logs can't be teared apart (which they can now if something else logs at
the same time); furthermore, this would also decrease codesize.

> +        }
> +    }
> +
> +    // Test if the gateway starts with either http:// or https://
> +    if (av_stristart(c->gateway_buffer, "http://", NULL) == 0
> +        && av_stristart(c->gateway_buffer, "https://", NULL) == 0) {
> +        av_log(h, AV_LOG_WARNING, "The gateway URL didn't start with http:// or https:// and is therefore invalid.\n");
> +        ret = AVERROR(EILSEQ);
> +        goto err;
> +    }
> +
> +    // Concatenate the url.
> +    // This ends up with something like: http://localhost:8080/ipfs/Qm.....
> +    // The format of "%s%s%s%s" is the following:
> +    // 1st %s = The gateway.
> +    // 2nd %s = If the gateway didn't end in a slash, add a "/". Otherwise it's an empty string
> +    // 3rd %s = Either ipns/ or ipfs/.
> +    // 4th %s = The IPFS CID (Qm..., bafy..., ...).
> +    fulluri = av_asprintf("%s%s%s%s",
> +                          c->gateway_buffer,
> +                          (c->gateway_buffer[strlen(c->gateway_buffer) - 1] == '/') ? "" : "/",
> +                          (is_ipns) ? "ipns/" : "ipfs/",
> +                          ipfs_cid);

Missing allocation check.

> +
> +    // Pass the URL back to FFMpeg's protocol handler.
> +    if ((ret = ffurl_open_whitelist(&c->inner, fulluri, flags,
> +                                    &h->interrupt_callback, options,
> +                                    h->protocol_whitelist,
> +                                    h->protocol_blacklist, h))
> +        < 0) {

Weird formatting; why don't you just use
ret = ffurl_open_whitelist(...);
if (ret < 0) {

> +        av_log(h, AV_LOG_WARNING, "Unable to open resource: %s\n", fulluri);
> +        goto err;
> +    }
> +
> +err:
> +    av_free(fulluri);
> +    return ret;
> +}
> +
> +static int ipfs_read(URLContext *h, unsigned char *buf, int size)
> +{
> +    IPFSGatewayContext *c = h->priv_data;
> +    return ffurl_read(c->inner, buf, size);
> +}
> +
> +static int64_t ipfs_seek(URLContext *h, int64_t pos, int whence)
> +{
> +    IPFSGatewayContext *c = h->priv_data;
> +    return ffurl_seek(c->inner, pos, whence);
> +}
> +
> +static int ipfs_close(URLContext *h)
> +{
> +    IPFSGatewayContext *c = h->priv_data;
> +    return ffurl_closep(&c->inner);
> +}
> +
> +#define OFFSET(x) offsetof(IPFSGatewayContext, x)
> +
> +static const AVOption options[] = {
> +    {"gateway", "The gateway to ask for IPFS data.", OFFSET(gateway), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, AV_OPT_FLAG_DECODING_PARAM},
> +    {NULL},
> +};
> +
> +static const AVClass ipfs_context_class = {
> +    .class_name   = "IPFS",
> +    .item_name    = av_default_item_name,
> +    .option       = options,
> +    .version      = LIBAVUTIL_VERSION_INT,
> +};
> +
> +const URLProtocol ff_ipfs_protocol = {
> +    .name             = "ipfs",
> +    .url_open2        = translate_ipfs_to_http,
> +    .url_read         = ipfs_read,
> +    .url_seek         = ipfs_seek,
> +    .url_close        = ipfs_close,
> +    .priv_data_size   = sizeof(IPFSGatewayContext),
> +    .priv_data_class  = &ipfs_context_class,
> +};
> +
> +const URLProtocol ff_ipns_protocol = {
> +    .name             = "ipns",
> +    .url_open2        = translate_ipfs_to_http,
> +    .url_read         = ipfs_read,
> +    .url_seek         = ipfs_seek,
> +    .url_close        = ipfs_close,
> +    .priv_data_size   = sizeof(IPFSGatewayContext),
> +    .priv_data_class  = &ipfs_context_class,
> +};
> diff --git a/libavformat/protocols.c b/libavformat/protocols.c
> index d07563cd0c..6ee62a598a 100644
> --- a/libavformat/protocols.c
> +++ b/libavformat/protocols.c
> @@ -71,6 +71,8 @@ extern const URLProtocol ff_libsrt_protocol;
>  extern const URLProtocol ff_libssh_protocol;
>  extern const URLProtocol ff_libsmbclient_protocol;
>  extern const URLProtocol ff_libzmq_protocol;
> +extern const URLProtocol ff_ipfs_protocol;
> +extern const URLProtocol ff_ipns_protocol;
>  
>  #include "libavformat/protocol_list.c"
>
Mark Gaiser March 30, 2022, 1:03 p.m. UTC | #2
On Wed, Mar 30, 2022 at 2:21 PM Andreas Rheinhardt <
andreas.rheinhardt@outlook.com> wrote:

> Mark Gaiser:
> > This patch adds support for:
> > - ffplay ipfs://<cid>
> > - ffplay ipns://<cid>
> >
> > IPFS data can be played from so called "ipfs gateways".
> > A gateway is essentially a webserver that gives access to the
> > distributed IPFS network.
> >
> > This protocol support (ipfs and ipns) therefore translates
> > ipfs:// and ipns:// to a http:// url. This resulting url is
> > then handled by the http protocol. It could also be https
> > depending on the gateway provided.
> >
> > To use this protocol, a gateway must be provided.
> > If you do nothing it will try to find it in your
> > $HOME/.ipfs/gateway file. The ways to set it manually are:
> > 1. Define a -gateway <url> to the gateway.
> > 2. Define $IPFS_GATEWAY with the full http link to the gateway.
> > 3. Define $IPFS_PATH and point it to the IPFS data path.
> > 4. Have IPFS running in your local user folder (under $HOME/.ipfs).
> >
> > Signed-off-by: Mark Gaiser <markg85@gmail.com>
> > ---
> >  configure                 |   2 +
> >  doc/protocols.texi        |  30 ++++
> >  libavformat/Makefile      |   2 +
> >  libavformat/ipfsgateway.c | 309 ++++++++++++++++++++++++++++++++++++++
> >  libavformat/protocols.c   |   2 +
> >  5 files changed, 345 insertions(+)
> >  create mode 100644 libavformat/ipfsgateway.c
> >
> > diff --git a/configure b/configure
> > index e4d36aa639..55af90957a 100755
> > --- a/configure
> > +++ b/configure
> > @@ -3579,6 +3579,8 @@ udp_protocol_select="network"
> >  udplite_protocol_select="network"
> >  unix_protocol_deps="sys_un_h"
> >  unix_protocol_select="network"
> > +ipfs_protocol_select="https_protocol"
> > +ipns_protocol_select="https_protocol"
> >
> >  # external library protocols
> >  libamqp_protocol_deps="librabbitmq"
> > diff --git a/doc/protocols.texi b/doc/protocols.texi
> > index d207df0b52..7c9c0a4808 100644
> > --- a/doc/protocols.texi
> > +++ b/doc/protocols.texi
> > @@ -2025,5 +2025,35 @@ decoding errors.
> >
> >  @end table
> >
> > +@section ipfs
> > +
> > +InterPlanetary File System (IPFS) protocol support. One can access
> files stored
> > +on the IPFS network through so called gateways. Those are http(s)
> endpoints.
> > +This protocol wraps the IPFS native protocols (ipfs:// and ipns://) to
> be send
> > +to such a gateway. Users can (and should) host their own node which
> means this
> > +protocol will use your local machine gateway to access files on the
> IPFS network.
> > +
> > +If a user doesn't have a node of their own then the public gateway
> dweb.link is
> > +used by default.
> > +
> > +You can use this protocol in 2 ways. Using IPFS:
> > +@example
> > +ffplay ipfs://QmbGtJg23skhvFmu9mJiePVByhfzu5rwo74MEkVDYAmF5T
> > +@end example
> > +
> > +Or the IPNS protocol (IPNS is mutable IPFS):
> > +@example
> > +ffplay ipns://QmbGtJg23skhvFmu9mJiePVByhfzu5rwo74MEkVDYAmF5T
> > +@end example
> > +
> > +You can also change the gateway to be used:
> > +
> > +@table @option
> > +
> > +@item gateway
> > +Defines the gateway to use. When nothing is provided the protocol will
> first try
> > +your local gateway. If that fails dweb.link will be used.
> > +
> > +@end table
> >
> >  @c man end PROTOCOLS
> > diff --git a/libavformat/Makefile b/libavformat/Makefile
> > index d7182d6bd8..e3233fd7ac 100644
> > --- a/libavformat/Makefile
> > +++ b/libavformat/Makefile
> > @@ -660,6 +660,8 @@ OBJS-$(CONFIG_SRTP_PROTOCOL)             +=
> srtpproto.o srtp.o
> >  OBJS-$(CONFIG_SUBFILE_PROTOCOL)          += subfile.o
> >  OBJS-$(CONFIG_TEE_PROTOCOL)              += teeproto.o tee_common.o
> >  OBJS-$(CONFIG_TCP_PROTOCOL)              += tcp.o
> > +OBJS-$(CONFIG_IPFS_PROTOCOL)             += ipfsgateway.o
> > +OBJS-$(CONFIG_IPNS_PROTOCOL)             += ipfsgateway.o
> >  TLS-OBJS-$(CONFIG_GNUTLS)                += tls_gnutls.o
> >  TLS-OBJS-$(CONFIG_LIBTLS)                += tls_libtls.o
> >  TLS-OBJS-$(CONFIG_MBEDTLS)               += tls_mbedtls.o
> > diff --git a/libavformat/ipfsgateway.c b/libavformat/ipfsgateway.c
> > new file mode 100644
> > index 0000000000..1a039589c0
> > --- /dev/null
> > +++ b/libavformat/ipfsgateway.c
> > @@ -0,0 +1,309 @@
> > +/*
> > + * IPFS and IPNS protocol support through IPFS Gateway.
> > + * Copyright (c) 2022 Mark Gaiser
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> > + */
> > +
> > +#include "avformat.h"
> > +#include "libavutil/avassert.h"
>
> Unused.
>
> > +#include "libavutil/avstring.h"
> > +#include "libavutil/internal.h"
> > +#include "libavutil/opt.h"
> > +#include "libavutil/tree.h"
>
> ?
>

This whole include part can be cleaned up much more.
Just having these:
#include "libavutil/avstring.h"
#include "libavutil/opt.h"
#include "url.h"
#include <sys/stat.h>

is enough.


> > +#include <fcntl.h>
> > +#if HAVE_IO_H
> > +#include <io.h>
> > +#endif
> > +#if HAVE_UNISTD_H
> > +#include <unistd.h>
> > +#endif
> > +#include "os_support.h"
> > +#include "url.h"
> > +#include <stdlib.h>
> > +#include <sys/stat.h>
> > +
> > +typedef struct IPFSGatewayContext {
> > +    AVClass *class;
> > +    URLContext *inner;
> > +    // Is filled by the -gateway argument and not changed after.
> > +    char *gateway;
> > +    // If the above gateway is non null, it will be copied into this
> buffer.
> > +    // Else this buffer will contain the auto detected gateway.
> > +    // In either case, the gateway to use will be in this buffer.
> > +    char gateway_buffer[PATH_MAX];
> > +} IPFSGatewayContext;
> > +
> > +// A best-effort way to find the IPFS gateway.
> > +// Only the most appropiate gateway is set. It's not actually requested
> > +// (http call) to prevent a potential slowdown in startup. A potential
> timeout
> > +// is handled by the HTTP protocol.
> > +static int populate_ipfs_gateway(URLContext *h)
> > +{
> > +    IPFSGatewayContext *c = h->priv_data;
> > +    char ipfs_full_data_folder[PATH_MAX];
> > +    char ipfs_gateway_file[PATH_MAX];
> > +    struct stat st;
> > +    int stat_ret = 0;
> > +    int ret = AVERROR(EINVAL);
> > +    FILE *gateway_file = NULL;
> > +
> > +    // Test $IPFS_GATEWAY.
> > +    if (getenv("IPFS_GATEWAY") != NULL) {
> > +        if (snprintf(c->gateway_buffer, sizeof(c->gateway_buffer), "%s",
> > +                     getenv("IPFS_GATEWAY")) >=
> sizeof(c->gateway_buffer)) {
> > +            av_log(h, AV_LOG_WARNING, "The IPFS_GATEWAY environment
> variable exceeds the maximum length. We allow a max of %zu characters\n",
> sizeof(c->gateway_buffer));
> > +            ret = AVERROR(EINVAL);
> > +            goto err;
> > +        }
> > +
> > +        ret = 1;
> > +        goto err;
> > +    } else
> > +        av_log(h, AV_LOG_DEBUG, "$IPFS_GATEWAY is empty.\n");
> > +
> > +    // We need to know the IPFS folder to - eventually - read the
> contents of
> > +    // the "gateway" file which would tell us the gateway to use.
> > +    if (getenv("IPFS_PATH") == NULL) {
> > +        av_log(h, AV_LOG_DEBUG, "$IPFS_PATH is empty.\n");
> > +
> > +        // Try via the home folder.
> > +        if (getenv("HOME") == NULL) {
> > +            av_log(h, AV_LOG_WARNING, "$HOME appears to be empty.\n");
> > +            ret = AVERROR(EINVAL);
> > +            goto err;
> > +        }
> > +
> > +        // Verify the composed path fits.
> > +        if (snprintf(ipfs_full_data_folder,
> sizeof(ipfs_full_data_folder),
> > +                     "%s/.ipfs/", getenv("HOME")) >=
> sizeof(ipfs_full_data_folder)) {
> > +            av_log(h, AV_LOG_WARNING, "The IPFS data path exceeds the
> max path length (%zu)\n", sizeof(ipfs_full_data_folder));
> > +            ret = AVERROR(EINVAL);
> > +            goto err;
> > +        }
> > +
> > +        // Stat the folder.
> > +        // It should exist in a default IPFS setup when run as local
> user.
> > +#ifndef _WIN32
> > +        stat_ret = stat(ipfs_full_data_folder, &st);
> > +#else
> > +        stat_ret = win32_stat(ipfs_full_data_folder, &st);
> > +#endif
> > +        if (stat_ret < 0) {
> > +            av_log(h, AV_LOG_INFO, "Unable to find IPFS folder. We
> tried:\n");
> > +            av_log(h, AV_LOG_INFO, "- $IPFS_PATH, which was empty.\n");
> > +            av_log(h, AV_LOG_INFO, "- $HOME/.ipfs (full uri: %s) which
> doesn't exist.\n", ipfs_full_data_folder);
> > +            ret = AVERROR(ENOENT);
> > +            goto err;
> > +        }
> > +    } else {
> > +        if (snprintf(ipfs_full_data_folder,
> sizeof(ipfs_full_data_folder), "%s",
> > +                 getenv("IPFS_PATH")) >= sizeof(ipfs_full_data_folder))
> {
> > +            av_log(h, AV_LOG_WARNING, "The IPFS_PATH environment
> variable exceeds the maximum length. We allow a max of %zu characters\n",
> sizeof(c->gateway_buffer));
> > +            ret = AVERROR(EINVAL);
> > +            goto err;
> > +        }
> > +
> > +    }
> > +
> > +    // Copy the fully composed gateway path into ipfs_gateway_file.
> > +    if (snprintf(ipfs_gateway_file, sizeof(ipfs_gateway_file),
> "%sgateway",
> > +                 ipfs_full_data_folder) >= sizeof(ipfs_gateway_file)) {
> > +        av_log(h, AV_LOG_WARNING, "The IPFS gateway file path exceeds
> the max path length (%zu)\n", sizeof(ipfs_gateway_file));
> > +        ret = AVERROR(ENOENT);
> > +        goto err;
> > +    }
> > +
> > +    // Get the contents of the gateway file.
> > +    gateway_file = av_fopen_utf8(ipfs_gateway_file, "r");
> > +    if (!gateway_file) {
> > +        av_log(h, AV_LOG_WARNING, "The IPFS gateway file (full uri: %s)
> doesn't exist. Is the gateway enabled?\n", ipfs_gateway_file);
> > +        ret = AVERROR(ENOENT);
> > +        goto err;
> > +    }
> > +
> > +    // Read a single line (fgets stops at new line mark).
> > +    fgets(c->gateway_buffer, sizeof(c->gateway_buffer) - 1,
> gateway_file);
> > +
> > +    // Replace the last char with \0
> > +    c->gateway_buffer[sizeof(c->gateway_buffer) - 1] = 0;
> > +
> > +    // Replace first occurence of end of line with \0
> > +    c->gateway_buffer[strcspn(c->gateway_buffer, "\r")] = 0;
> > +    c->gateway_buffer[strcspn(c->gateway_buffer, "\n")] = 0;
>
> If the buffer contains both \r and \n and the first \n precedes the
> first \r, then the above zeroes both the first \r and the first \n. If
> it is enough to only zero the first newline, then this can be simplified
> to "c->gateway_buffer[strcspn(c->gateway_buffer, "\r\n")] = 0;".
>

It used to be in your suggested simplified approach.
It was then suggested to split it because of new line differences
on different platforms. This was supposed to catch "everything".

I prefer to keep this as-is now.


> > +
> > +    // If strlen finds anything longer then 0 characters then we have a
> > +    // potential gateway url.
> > +    if (strlen(c->gateway_buffer) < 1) {
>
> if (*c->gateway_buffer == '\0')
>

Is that a style difference or an actual behavior difference?


>
> > +        av_log(h, AV_LOG_WARNING, "The IPFS gateway file (full uri: %s)
> appears to be empty. Is the gateway started?\n", ipfs_gateway_file);
> > +        ret = AVERROR(EILSEQ);
> > +        goto err;
> > +    } else {
> > +        // We're done, the c->gateway_buffer has something that looks
> valid.
> > +        ret = 1;
> > +        goto err;
> > +    }
> > +
> > +err:
> > +    if (gateway_file)
> > +        fclose(gateway_file);
> > +
> > +    return ret;
> > +}
> > +
> > +static int translate_ipfs_to_http(URLContext *h, const char *uri,
> > +                                  int flags, AVDictionary **options)
> > +{
> > +    const char *ipfs_cid;
> > +    char *fulluri = NULL;
> > +    int ret;
> > +    IPFSGatewayContext *c = h->priv_data;
> > +
> > +    // Test for ipfs://, ipfs:, ipns:// and ipns:. This prefix is
> stripped from
> > +    // the string leaving just the CID in ipfs_cid.
> > +    int is_ipfs = av_stristart(uri, "ipfs://", &ipfs_cid);
> > +    int is_ipns = av_stristart(uri, "ipns://", &ipfs_cid);
> > +
> > +    // We must have either ipns or ipfs.
> > +    if (!is_ipfs && !is_ipns) {
> > +        ret = AVERROR(EINVAL);
> > +        av_log(h, AV_LOG_WARNING, "Unsupported url %s\n", uri);
> > +        goto err;
> > +    }
> > +
> > +    // If the CID has a length greater then 0 then we assume we have a
> proper working one.
> > +    // It could still be wrong but in that case the gateway should save
> us and
> > +    // ruturn a 403 error. The http protocol handles this.
> > +    if (strlen(ipfs_cid) < 1) {
> > +        av_log(h, AV_LOG_WARNING, "A CID must be provided.\n");
> > +        ret = AVERROR(EILSEQ);
> > +        goto err;
> > +    }
> > +
> > +    // Populate c->gateway_buffer with whatever is in c->gateway
> > +    if (c->gateway != NULL) {
> > +        if (snprintf(c->gateway_buffer, sizeof(c->gateway_buffer), "%s",
> > +                     c->gateway) >= sizeof(c->gateway_buffer)) {
> > +            av_log(h, AV_LOG_WARNING, "The -gateway parameter is too
> long. We allow a max of %zu characters\n", sizeof(c->gateway_buffer));
>
> We typically use SIZE_SPECIFIER instead of z for compatibility with
> ancient versions of MSVC.
> (I don't know whether there is any supported version of MSVC that
> doesn't support z; I don't use MSVC myself.)
>

Here too I was explicitly suggested to use %zu (when I was using - I think
- %lu before).
So I assume that the ancient MSVC version you're referring to is probably
not supported anymore from an ffmpeg compiler requirement point of view?

Should i now change all "%zu" to "%"SIZE_SPECIFIER (this does not make it
neater nor shorter with the 80 char line limit).
Is this change required?

If it is, what _exactly_ do i need to change it in? I see a
couple different SIZE_SPECIFIER prefixes. I have no clue what to use here.


> > +            ret = AVERROR(EINVAL);
> > +            goto err;
> > +        }
> > +    } else {
> > +        // Populate the IPFS gateway if we have any.
> > +        // If not, inform the user how to properly set one.
> > +        ret = populate_ipfs_gateway(h);
> > +
> > +        if (ret < 1) {
> > +            // We fallback on dweb.link (managed by Protocol Labs).
> > +            snprintf(c->gateway_buffer, sizeof(c->gateway_buffer), "
> https://dweb.link");
> > +
> > +            av_log(h, AV_LOG_WARNING, "IPFS does not appear to be
> running. You’re now using the public gateway at dweb.link.\n");
> > +            av_log(h, AV_LOG_INFO, "Installing IPFS locally is
> recommended to improve performance and reliability, and not share all your
> activity with a single IPFS gateway.\n");
> > +            av_log(h, AV_LOG_INFO, "There are multiple options to
> define this gateway.\n");
> > +            av_log(h, AV_LOG_INFO, "1. Call ffmpeg with a gateway
> param, without a trailing slash: -gateway <url>.\n");
> > +            av_log(h, AV_LOG_INFO, "2. Define an $IPFS_GATEWAY
> environment variable with the full HTTP URL to the gateway without trailing
> forward slash.\n");
> > +            av_log(h, AV_LOG_INFO, "3. Define an $IPFS_PATH environment
> variable and point it to the IPFS data path - this is typically ~/.ipfs\n");
>
> All those AV_LOG_INFO can be combined which has the advantage that the
> logs can't be teared apart (which they can now if something else logs at
> the same time); furthermore, this would also decrease codesize.
>

Do you have an example of where that's happening?

>
> > +        }
> > +    }
> > +
> > +    // Test if the gateway starts with either http:// or https://
> > +    if (av_stristart(c->gateway_buffer, "http://", NULL) == 0
> > +        && av_stristart(c->gateway_buffer, "https://", NULL) == 0) {
> > +        av_log(h, AV_LOG_WARNING, "The gateway URL didn't start with
> http:// or https:// and is therefore invalid.\n");
> > +        ret = AVERROR(EILSEQ);
> > +        goto err;
> > +    }
> > +
> > +    // Concatenate the url.
> > +    // This ends up with something like:
> http://localhost:8080/ipfs/Qm.....
> > +    // The format of "%s%s%s%s" is the following:
> > +    // 1st %s = The gateway.
> > +    // 2nd %s = If the gateway didn't end in a slash, add a "/".
> Otherwise it's an empty string
> > +    // 3rd %s = Either ipns/ or ipfs/.
> > +    // 4th %s = The IPFS CID (Qm..., bafy..., ...).
> > +    fulluri = av_asprintf("%s%s%s%s",
> > +                          c->gateway_buffer,
> > +                          (c->gateway_buffer[strlen(c->gateway_buffer)
> - 1] == '/') ? "" : "/",
> > +                          (is_ipns) ? "ipns/" : "ipfs/",
> > +                          ipfs_cid);
>
> Missing allocation check.
>

Ah
Fixed it locally.


>
> > +
> > +    // Pass the URL back to FFMpeg's protocol handler.
> > +    if ((ret = ffurl_open_whitelist(&c->inner, fulluri, flags,
> > +                                    &h->interrupt_callback, options,
> > +                                    h->protocol_whitelist,
> > +                                    h->protocol_blacklist, h))
> > +        < 0) {
>
> Weird formatting; why don't you just use
> ret = ffurl_open_whitelist(...);
> if (ret < 0) {
>

Fixed.


>
> > +        av_log(h, AV_LOG_WARNING, "Unable to open resource: %s\n",
> fulluri);
> > +        goto err;
> > +    }
> > +
> > +err:
> > +    av_free(fulluri);
> > +    return ret;
> > +}
> > +
> > +static int ipfs_read(URLContext *h, unsigned char *buf, int size)
> > +{
> > +    IPFSGatewayContext *c = h->priv_data;
> > +    return ffurl_read(c->inner, buf, size);
> > +}
> > +
> > +static int64_t ipfs_seek(URLContext *h, int64_t pos, int whence)
> > +{
> > +    IPFSGatewayContext *c = h->priv_data;
> > +    return ffurl_seek(c->inner, pos, whence);
> > +}
> > +
> > +static int ipfs_close(URLContext *h)
> > +{
> > +    IPFSGatewayContext *c = h->priv_data;
> > +    return ffurl_closep(&c->inner);
> > +}
> > +
> > +#define OFFSET(x) offsetof(IPFSGatewayContext, x)
> > +
> > +static const AVOption options[] = {
> > +    {"gateway", "The gateway to ask for IPFS data.", OFFSET(gateway),
> AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, AV_OPT_FLAG_DECODING_PARAM},
> > +    {NULL},
> > +};
> > +
> > +static const AVClass ipfs_context_class = {
> > +    .class_name   = "IPFS",
> > +    .item_name    = av_default_item_name,
> > +    .option       = options,
> > +    .version      = LIBAVUTIL_VERSION_INT,
> > +};
> > +
> > +const URLProtocol ff_ipfs_protocol = {
> > +    .name             = "ipfs",
> > +    .url_open2        = translate_ipfs_to_http,
> > +    .url_read         = ipfs_read,
> > +    .url_seek         = ipfs_seek,
> > +    .url_close        = ipfs_close,
> > +    .priv_data_size   = sizeof(IPFSGatewayContext),
> > +    .priv_data_class  = &ipfs_context_class,
> > +};
> > +
> > +const URLProtocol ff_ipns_protocol = {
> > +    .name             = "ipns",
> > +    .url_open2        = translate_ipfs_to_http,
> > +    .url_read         = ipfs_read,
> > +    .url_seek         = ipfs_seek,
> > +    .url_close        = ipfs_close,
> > +    .priv_data_size   = sizeof(IPFSGatewayContext),
> > +    .priv_data_class  = &ipfs_context_class,
> > +};
> > diff --git a/libavformat/protocols.c b/libavformat/protocols.c
> > index d07563cd0c..6ee62a598a 100644
> > --- a/libavformat/protocols.c
> > +++ b/libavformat/protocols.c
> > @@ -71,6 +71,8 @@ extern const URLProtocol ff_libsrt_protocol;
> >  extern const URLProtocol ff_libssh_protocol;
> >  extern const URLProtocol ff_libsmbclient_protocol;
> >  extern const URLProtocol ff_libzmq_protocol;
> > +extern const URLProtocol ff_ipfs_protocol;
> > +extern const URLProtocol ff_ipns_protocol;
> >
> >  #include "libavformat/protocol_list.c"
> >
>
>
Thank you very much for your review!
Please do hit me back with a reply on the questions I have still open.

I'm honestly quite done with patching this over and over again (it's open
for months now) so I'd like to put the pace in these fixes and send an
updated version today.
Andreas Rheinhardt March 30, 2022, 1:57 p.m. UTC | #3
Mark Gaiser:
> On Wed, Mar 30, 2022 at 2:21 PM Andreas Rheinhardt <
> andreas.rheinhardt@outlook.com> wrote:
> 
>> Mark Gaiser:
>>> This patch adds support for:
>>> - ffplay ipfs://<cid>
>>> - ffplay ipns://<cid>
>>>
>>> IPFS data can be played from so called "ipfs gateways".
>>> A gateway is essentially a webserver that gives access to the
>>> distributed IPFS network.
>>>
>>> This protocol support (ipfs and ipns) therefore translates
>>> ipfs:// and ipns:// to a http:// url. This resulting url is
>>> then handled by the http protocol. It could also be https
>>> depending on the gateway provided.
>>>
>>> To use this protocol, a gateway must be provided.
>>> If you do nothing it will try to find it in your
>>> $HOME/.ipfs/gateway file. The ways to set it manually are:
>>> 1. Define a -gateway <url> to the gateway.
>>> 2. Define $IPFS_GATEWAY with the full http link to the gateway.
>>> 3. Define $IPFS_PATH and point it to the IPFS data path.
>>> 4. Have IPFS running in your local user folder (under $HOME/.ipfs).
>>>
>>> Signed-off-by: Mark Gaiser <markg85@gmail.com>
>>> ---
>>>  configure                 |   2 +
>>>  doc/protocols.texi        |  30 ++++
>>>  libavformat/Makefile      |   2 +
>>>  libavformat/ipfsgateway.c | 309 ++++++++++++++++++++++++++++++++++++++
>>>  libavformat/protocols.c   |   2 +
>>>  5 files changed, 345 insertions(+)
>>>  create mode 100644 libavformat/ipfsgateway.c
>>>
>>> diff --git a/configure b/configure
>>> index e4d36aa639..55af90957a 100755
>>> --- a/configure
>>> +++ b/configure
>>> @@ -3579,6 +3579,8 @@ udp_protocol_select="network"
>>>  udplite_protocol_select="network"
>>>  unix_protocol_deps="sys_un_h"
>>>  unix_protocol_select="network"
>>> +ipfs_protocol_select="https_protocol"
>>> +ipns_protocol_select="https_protocol"
>>>
>>>  # external library protocols
>>>  libamqp_protocol_deps="librabbitmq"
>>> diff --git a/doc/protocols.texi b/doc/protocols.texi
>>> index d207df0b52..7c9c0a4808 100644
>>> --- a/doc/protocols.texi
>>> +++ b/doc/protocols.texi
>>> @@ -2025,5 +2025,35 @@ decoding errors.
>>>
>>>  @end table
>>>
>>> +@section ipfs
>>> +
>>> +InterPlanetary File System (IPFS) protocol support. One can access
>> files stored
>>> +on the IPFS network through so called gateways. Those are http(s)
>> endpoints.
>>> +This protocol wraps the IPFS native protocols (ipfs:// and ipns://) to
>> be send
>>> +to such a gateway. Users can (and should) host their own node which
>> means this
>>> +protocol will use your local machine gateway to access files on the
>> IPFS network.
>>> +
>>> +If a user doesn't have a node of their own then the public gateway
>> dweb.link is
>>> +used by default.
>>> +
>>> +You can use this protocol in 2 ways. Using IPFS:
>>> +@example
>>> +ffplay ipfs://QmbGtJg23skhvFmu9mJiePVByhfzu5rwo74MEkVDYAmF5T
>>> +@end example
>>> +
>>> +Or the IPNS protocol (IPNS is mutable IPFS):
>>> +@example
>>> +ffplay ipns://QmbGtJg23skhvFmu9mJiePVByhfzu5rwo74MEkVDYAmF5T
>>> +@end example
>>> +
>>> +You can also change the gateway to be used:
>>> +
>>> +@table @option
>>> +
>>> +@item gateway
>>> +Defines the gateway to use. When nothing is provided the protocol will
>> first try
>>> +your local gateway. If that fails dweb.link will be used.
>>> +
>>> +@end table
>>>
>>>  @c man end PROTOCOLS
>>> diff --git a/libavformat/Makefile b/libavformat/Makefile
>>> index d7182d6bd8..e3233fd7ac 100644
>>> --- a/libavformat/Makefile
>>> +++ b/libavformat/Makefile
>>> @@ -660,6 +660,8 @@ OBJS-$(CONFIG_SRTP_PROTOCOL)             +=
>> srtpproto.o srtp.o
>>>  OBJS-$(CONFIG_SUBFILE_PROTOCOL)          += subfile.o
>>>  OBJS-$(CONFIG_TEE_PROTOCOL)              += teeproto.o tee_common.o
>>>  OBJS-$(CONFIG_TCP_PROTOCOL)              += tcp.o
>>> +OBJS-$(CONFIG_IPFS_PROTOCOL)             += ipfsgateway.o
>>> +OBJS-$(CONFIG_IPNS_PROTOCOL)             += ipfsgateway.o
>>>  TLS-OBJS-$(CONFIG_GNUTLS)                += tls_gnutls.o
>>>  TLS-OBJS-$(CONFIG_LIBTLS)                += tls_libtls.o
>>>  TLS-OBJS-$(CONFIG_MBEDTLS)               += tls_mbedtls.o
>>> diff --git a/libavformat/ipfsgateway.c b/libavformat/ipfsgateway.c
>>> new file mode 100644
>>> index 0000000000..1a039589c0
>>> --- /dev/null
>>> +++ b/libavformat/ipfsgateway.c
>>> @@ -0,0 +1,309 @@
>>> +/*
>>> + * IPFS and IPNS protocol support through IPFS Gateway.
>>> + * Copyright (c) 2022 Mark Gaiser
>>> + *
>>> + * This file is part of FFmpeg.
>>> + *
>>> + * FFmpeg is free software; you can redistribute it and/or
>>> + * modify it under the terms of the GNU Lesser General Public
>>> + * License as published by the Free Software Foundation; either
>>> + * version 2.1 of the License, or (at your option) any later version.
>>> + *
>>> + * FFmpeg is distributed in the hope that it will be useful,
>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>> + * Lesser General Public License for more details.
>>> + *
>>> + * You should have received a copy of the GNU Lesser General Public
>>> + * License along with FFmpeg; if not, write to the Free Software
>>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
>> 02110-1301 USA
>>> + */
>>> +
>>> +#include "avformat.h"
>>> +#include "libavutil/avassert.h"
>>
>> Unused.
>>
>>> +#include "libavutil/avstring.h"
>>> +#include "libavutil/internal.h"
>>> +#include "libavutil/opt.h"
>>> +#include "libavutil/tree.h"
>>
>> ?
>>
> 
> This whole include part can be cleaned up much more.
> Just having these:
> #include "libavutil/avstring.h"
> #include "libavutil/opt.h"
> #include "url.h"
> #include <sys/stat.h>
> 
> is enough.
> 
> 
>>> +#include <fcntl.h>
>>> +#if HAVE_IO_H
>>> +#include <io.h>
>>> +#endif
>>> +#if HAVE_UNISTD_H
>>> +#include <unistd.h>
>>> +#endif
>>> +#include "os_support.h"
>>> +#include "url.h"
>>> +#include <stdlib.h>
>>> +#include <sys/stat.h>
>>> +
>>> +typedef struct IPFSGatewayContext {
>>> +    AVClass *class;
>>> +    URLContext *inner;
>>> +    // Is filled by the -gateway argument and not changed after.
>>> +    char *gateway;
>>> +    // If the above gateway is non null, it will be copied into this
>> buffer.
>>> +    // Else this buffer will contain the auto detected gateway.
>>> +    // In either case, the gateway to use will be in this buffer.
>>> +    char gateway_buffer[PATH_MAX];
>>> +} IPFSGatewayContext;
>>> +
>>> +// A best-effort way to find the IPFS gateway.
>>> +// Only the most appropiate gateway is set. It's not actually requested
>>> +// (http call) to prevent a potential slowdown in startup. A potential
>> timeout
>>> +// is handled by the HTTP protocol.
>>> +static int populate_ipfs_gateway(URLContext *h)
>>> +{
>>> +    IPFSGatewayContext *c = h->priv_data;
>>> +    char ipfs_full_data_folder[PATH_MAX];
>>> +    char ipfs_gateway_file[PATH_MAX];
>>> +    struct stat st;
>>> +    int stat_ret = 0;
>>> +    int ret = AVERROR(EINVAL);
>>> +    FILE *gateway_file = NULL;
>>> +
>>> +    // Test $IPFS_GATEWAY.
>>> +    if (getenv("IPFS_GATEWAY") != NULL) {
>>> +        if (snprintf(c->gateway_buffer, sizeof(c->gateway_buffer), "%s",
>>> +                     getenv("IPFS_GATEWAY")) >=
>> sizeof(c->gateway_buffer)) {
>>> +            av_log(h, AV_LOG_WARNING, "The IPFS_GATEWAY environment
>> variable exceeds the maximum length. We allow a max of %zu characters\n",
>> sizeof(c->gateway_buffer));
>>> +            ret = AVERROR(EINVAL);
>>> +            goto err;
>>> +        }
>>> +
>>> +        ret = 1;
>>> +        goto err;
>>> +    } else
>>> +        av_log(h, AV_LOG_DEBUG, "$IPFS_GATEWAY is empty.\n");
>>> +
>>> +    // We need to know the IPFS folder to - eventually - read the
>> contents of
>>> +    // the "gateway" file which would tell us the gateway to use.
>>> +    if (getenv("IPFS_PATH") == NULL) {
>>> +        av_log(h, AV_LOG_DEBUG, "$IPFS_PATH is empty.\n");
>>> +
>>> +        // Try via the home folder.
>>> +        if (getenv("HOME") == NULL) {
>>> +            av_log(h, AV_LOG_WARNING, "$HOME appears to be empty.\n");
>>> +            ret = AVERROR(EINVAL);
>>> +            goto err;
>>> +        }
>>> +
>>> +        // Verify the composed path fits.
>>> +        if (snprintf(ipfs_full_data_folder,
>> sizeof(ipfs_full_data_folder),
>>> +                     "%s/.ipfs/", getenv("HOME")) >=
>> sizeof(ipfs_full_data_folder)) {
>>> +            av_log(h, AV_LOG_WARNING, "The IPFS data path exceeds the
>> max path length (%zu)\n", sizeof(ipfs_full_data_folder));
>>> +            ret = AVERROR(EINVAL);
>>> +            goto err;
>>> +        }
>>> +
>>> +        // Stat the folder.
>>> +        // It should exist in a default IPFS setup when run as local
>> user.
>>> +#ifndef _WIN32
>>> +        stat_ret = stat(ipfs_full_data_folder, &st);
>>> +#else
>>> +        stat_ret = win32_stat(ipfs_full_data_folder, &st);
>>> +#endif
>>> +        if (stat_ret < 0) {
>>> +            av_log(h, AV_LOG_INFO, "Unable to find IPFS folder. We
>> tried:\n");
>>> +            av_log(h, AV_LOG_INFO, "- $IPFS_PATH, which was empty.\n");
>>> +            av_log(h, AV_LOG_INFO, "- $HOME/.ipfs (full uri: %s) which
>> doesn't exist.\n", ipfs_full_data_folder);
>>> +            ret = AVERROR(ENOENT);
>>> +            goto err;
>>> +        }
>>> +    } else {
>>> +        if (snprintf(ipfs_full_data_folder,
>> sizeof(ipfs_full_data_folder), "%s",
>>> +                 getenv("IPFS_PATH")) >= sizeof(ipfs_full_data_folder))
>> {
>>> +            av_log(h, AV_LOG_WARNING, "The IPFS_PATH environment
>> variable exceeds the maximum length. We allow a max of %zu characters\n",
>> sizeof(c->gateway_buffer));
>>> +            ret = AVERROR(EINVAL);
>>> +            goto err;
>>> +        }
>>> +
>>> +    }
>>> +
>>> +    // Copy the fully composed gateway path into ipfs_gateway_file.
>>> +    if (snprintf(ipfs_gateway_file, sizeof(ipfs_gateway_file),
>> "%sgateway",
>>> +                 ipfs_full_data_folder) >= sizeof(ipfs_gateway_file)) {
>>> +        av_log(h, AV_LOG_WARNING, "The IPFS gateway file path exceeds
>> the max path length (%zu)\n", sizeof(ipfs_gateway_file));
>>> +        ret = AVERROR(ENOENT);
>>> +        goto err;
>>> +    }
>>> +
>>> +    // Get the contents of the gateway file.
>>> +    gateway_file = av_fopen_utf8(ipfs_gateway_file, "r");
>>> +    if (!gateway_file) {
>>> +        av_log(h, AV_LOG_WARNING, "The IPFS gateway file (full uri: %s)
>> doesn't exist. Is the gateway enabled?\n", ipfs_gateway_file);
>>> +        ret = AVERROR(ENOENT);
>>> +        goto err;
>>> +    }
>>> +
>>> +    // Read a single line (fgets stops at new line mark).
>>> +    fgets(c->gateway_buffer, sizeof(c->gateway_buffer) - 1,
>> gateway_file);
>>> +
>>> +    // Replace the last char with \0
>>> +    c->gateway_buffer[sizeof(c->gateway_buffer) - 1] = 0;
>>> +
>>> +    // Replace first occurence of end of line with \0
>>> +    c->gateway_buffer[strcspn(c->gateway_buffer, "\r")] = 0;
>>> +    c->gateway_buffer[strcspn(c->gateway_buffer, "\n")] = 0;
>>
>> If the buffer contains both \r and \n and the first \n precedes the
>> first \r, then the above zeroes both the first \r and the first \n. If
>> it is enough to only zero the first newline, then this can be simplified
>> to "c->gateway_buffer[strcspn(c->gateway_buffer, "\r\n")] = 0;".
>>
> 
> It used to be in your suggested simplified approach.
> It was then suggested to split it because of new line differences
> on different platforms. This was supposed to catch "everything".
> 
> I prefer to keep this as-is now.
> 

Is it intended to use what is between the first \n and the first \r
lateron although this data will be after the '\0' after this? That would
be very weird.

> 
>>> +
>>> +    // If strlen finds anything longer then 0 characters then we have a
>>> +    // potential gateway url.
>>> +    if (strlen(c->gateway_buffer) < 1) {
>>
>> if (*c->gateway_buffer == '\0')
>>
> 
> Is that a style difference or an actual behavior difference?
> 

The former would call strlen to check whether a string is empty (unless
the compiler optimizes it away). No actual behaviour difference exists.
But it is nevertheless more than a style difference.

> 
>>
>>> +        av_log(h, AV_LOG_WARNING, "The IPFS gateway file (full uri: %s)
>> appears to be empty. Is the gateway started?\n", ipfs_gateway_file);
>>> +        ret = AVERROR(EILSEQ);
>>> +        goto err;
>>> +    } else {
>>> +        // We're done, the c->gateway_buffer has something that looks
>> valid.
>>> +        ret = 1;
>>> +        goto err;
>>> +    }
>>> +
>>> +err:
>>> +    if (gateway_file)
>>> +        fclose(gateway_file);
>>> +
>>> +    return ret;
>>> +}
>>> +
>>> +static int translate_ipfs_to_http(URLContext *h, const char *uri,
>>> +                                  int flags, AVDictionary **options)
>>> +{
>>> +    const char *ipfs_cid;
>>> +    char *fulluri = NULL;
>>> +    int ret;
>>> +    IPFSGatewayContext *c = h->priv_data;
>>> +
>>> +    // Test for ipfs://, ipfs:, ipns:// and ipns:. This prefix is
>> stripped from
>>> +    // the string leaving just the CID in ipfs_cid.
>>> +    int is_ipfs = av_stristart(uri, "ipfs://", &ipfs_cid);
>>> +    int is_ipns = av_stristart(uri, "ipns://", &ipfs_cid);
>>> +
>>> +    // We must have either ipns or ipfs.
>>> +    if (!is_ipfs && !is_ipns) {
>>> +        ret = AVERROR(EINVAL);
>>> +        av_log(h, AV_LOG_WARNING, "Unsupported url %s\n", uri);
>>> +        goto err;
>>> +    }
>>> +
>>> +    // If the CID has a length greater then 0 then we assume we have a
>> proper working one.
>>> +    // It could still be wrong but in that case the gateway should save
>> us and
>>> +    // ruturn a 403 error. The http protocol handles this.
>>> +    if (strlen(ipfs_cid) < 1) {
>>> +        av_log(h, AV_LOG_WARNING, "A CID must be provided.\n");
>>> +        ret = AVERROR(EILSEQ);
>>> +        goto err;
>>> +    }
>>> +
>>> +    // Populate c->gateway_buffer with whatever is in c->gateway
>>> +    if (c->gateway != NULL) {
>>> +        if (snprintf(c->gateway_buffer, sizeof(c->gateway_buffer), "%s",
>>> +                     c->gateway) >= sizeof(c->gateway_buffer)) {
>>> +            av_log(h, AV_LOG_WARNING, "The -gateway parameter is too
>> long. We allow a max of %zu characters\n", sizeof(c->gateway_buffer));
>>
>> We typically use SIZE_SPECIFIER instead of z for compatibility with
>> ancient versions of MSVC.
>> (I don't know whether there is any supported version of MSVC that
>> doesn't support z; I don't use MSVC myself.)
>>
> 
> Here too I was explicitly suggested to use %zu (when I was using - I think
> - %lu before).
> So I assume that the ancient MSVC version you're referring to is probably
> not supported anymore from an ffmpeg compiler requirement point of view?
> 
> Should i now change all "%zu" to "%"SIZE_SPECIFIER (this does not make it
> neater nor shorter with the 80 char line limit).
> Is this change required?

As said: It is for compatibility with ancient versions of MSVC. But I
don't know whether any of the actually supported versions of MSVC still
need it.

> 
> If it is, what _exactly_ do i need to change it in? I see a
> couple different SIZE_SPECIFIER prefixes. I have no clue what to use here.

There is only one SIZE_SPECIFIER, namely SIZE_SPECIFIER; the
PTRDIFF_SPECIFIER (or whatever you see) is obviously not the thing to
use for size_t.

> 
> 
>>> +            ret = AVERROR(EINVAL);
>>> +            goto err;
>>> +        }
>>> +    } else {
>>> +        // Populate the IPFS gateway if we have any.
>>> +        // If not, inform the user how to properly set one.
>>> +        ret = populate_ipfs_gateway(h);
>>> +
>>> +        if (ret < 1) {
>>> +            // We fallback on dweb.link (managed by Protocol Labs).
>>> +            snprintf(c->gateway_buffer, sizeof(c->gateway_buffer), "
>> https://dweb.link");
>>> +
>>> +            av_log(h, AV_LOG_WARNING, "IPFS does not appear to be
>> running. You’re now using the public gateway at dweb.link.\n");
>>> +            av_log(h, AV_LOG_INFO, "Installing IPFS locally is
>> recommended to improve performance and reliability, and not share all your
>> activity with a single IPFS gateway.\n");
>>> +            av_log(h, AV_LOG_INFO, "There are multiple options to
>> define this gateway.\n");
>>> +            av_log(h, AV_LOG_INFO, "1. Call ffmpeg with a gateway
>> param, without a trailing slash: -gateway <url>.\n");
>>> +            av_log(h, AV_LOG_INFO, "2. Define an $IPFS_GATEWAY
>> environment variable with the full HTTP URL to the gateway without trailing
>> forward slash.\n");
>>> +            av_log(h, AV_LOG_INFO, "3. Define an $IPFS_PATH environment
>> variable and point it to the IPFS data path - this is typically ~/.ipfs\n");
>>
>> All those AV_LOG_INFO can be combined which has the advantage that the
>> logs can't be teared apart (which they can now if something else logs at
>> the same time); furthermore, this would also decrease codesize.
>>
> 
> Do you have an example of where that's happening?
> 

It can happen any time you have multiple threads using av_log at the
same time. Given that your statements are supposed to be full lines, it
is not that bad if it happens here, but it is nevertheless suboptimal.

>>
>>> +        }
>>> +    }
>>> +
>>> +    // Test if the gateway starts with either http:// or https://
>>> +    if (av_stristart(c->gateway_buffer, "http://", NULL) == 0
>>> +        && av_stristart(c->gateway_buffer, "https://", NULL) == 0) {
>>> +        av_log(h, AV_LOG_WARNING, "The gateway URL didn't start with
>> http:// or https:// and is therefore invalid.\n");
>>> +        ret = AVERROR(EILSEQ);
>>> +        goto err;
>>> +    }
>>> +
>>> +    // Concatenate the url.
>>> +    // This ends up with something like:
>> http://localhost:8080/ipfs/Qm.....
>>> +    // The format of "%s%s%s%s" is the following:
>>> +    // 1st %s = The gateway.
>>> +    // 2nd %s = If the gateway didn't end in a slash, add a "/".
>> Otherwise it's an empty string
>>> +    // 3rd %s = Either ipns/ or ipfs/.
>>> +    // 4th %s = The IPFS CID (Qm..., bafy..., ...).
>>> +    fulluri = av_asprintf("%s%s%s%s",
>>> +                          c->gateway_buffer,
>>> +                          (c->gateway_buffer[strlen(c->gateway_buffer)
>> - 1] == '/') ? "" : "/",
>>> +                          (is_ipns) ? "ipns/" : "ipfs/",
>>> +                          ipfs_cid);
>>
>> Missing allocation check.
>>
> 
> Ah
> Fixed it locally.
> 
> 
>>
>>> +
>>> +    // Pass the URL back to FFMpeg's protocol handler.
>>> +    if ((ret = ffurl_open_whitelist(&c->inner, fulluri, flags,
>>> +                                    &h->interrupt_callback, options,
>>> +                                    h->protocol_whitelist,
>>> +                                    h->protocol_blacklist, h))
>>> +        < 0) {
>>
>> Weird formatting; why don't you just use
>> ret = ffurl_open_whitelist(...);
>> if (ret < 0) {
>>
> 
> Fixed.
> 
> 
>>
>>> +        av_log(h, AV_LOG_WARNING, "Unable to open resource: %s\n",
>> fulluri);
>>> +        goto err;
>>> +    }
>>> +
>>> +err:
>>> +    av_free(fulluri);
>>> +    return ret;
>>> +}
>>> +
>>> +static int ipfs_read(URLContext *h, unsigned char *buf, int size)
>>> +{
>>> +    IPFSGatewayContext *c = h->priv_data;
>>> +    return ffurl_read(c->inner, buf, size);
>>> +}
>>> +
>>> +static int64_t ipfs_seek(URLContext *h, int64_t pos, int whence)
>>> +{
>>> +    IPFSGatewayContext *c = h->priv_data;
>>> +    return ffurl_seek(c->inner, pos, whence);
>>> +}
>>> +
>>> +static int ipfs_close(URLContext *h)
>>> +{
>>> +    IPFSGatewayContext *c = h->priv_data;
>>> +    return ffurl_closep(&c->inner);
>>> +}
>>> +
>>> +#define OFFSET(x) offsetof(IPFSGatewayContext, x)
>>> +
>>> +static const AVOption options[] = {
>>> +    {"gateway", "The gateway to ask for IPFS data.", OFFSET(gateway),
>> AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, AV_OPT_FLAG_DECODING_PARAM},
>>> +    {NULL},
>>> +};
>>> +
>>> +static const AVClass ipfs_context_class = {
>>> +    .class_name   = "IPFS",
>>> +    .item_name    = av_default_item_name,
>>> +    .option       = options,
>>> +    .version      = LIBAVUTIL_VERSION_INT,
>>> +};
>>> +
>>> +const URLProtocol ff_ipfs_protocol = {
>>> +    .name             = "ipfs",
>>> +    .url_open2        = translate_ipfs_to_http,
>>> +    .url_read         = ipfs_read,
>>> +    .url_seek         = ipfs_seek,
>>> +    .url_close        = ipfs_close,
>>> +    .priv_data_size   = sizeof(IPFSGatewayContext),
>>> +    .priv_data_class  = &ipfs_context_class,
>>> +};
>>> +
>>> +const URLProtocol ff_ipns_protocol = {
>>> +    .name             = "ipns",
>>> +    .url_open2        = translate_ipfs_to_http,
>>> +    .url_read         = ipfs_read,
>>> +    .url_seek         = ipfs_seek,
>>> +    .url_close        = ipfs_close,
>>> +    .priv_data_size   = sizeof(IPFSGatewayContext),
>>> +    .priv_data_class  = &ipfs_context_class,
>>> +};
>>> diff --git a/libavformat/protocols.c b/libavformat/protocols.c
>>> index d07563cd0c..6ee62a598a 100644
>>> --- a/libavformat/protocols.c
>>> +++ b/libavformat/protocols.c
>>> @@ -71,6 +71,8 @@ extern const URLProtocol ff_libsrt_protocol;
>>>  extern const URLProtocol ff_libssh_protocol;
>>>  extern const URLProtocol ff_libsmbclient_protocol;
>>>  extern const URLProtocol ff_libzmq_protocol;
>>> +extern const URLProtocol ff_ipfs_protocol;
>>> +extern const URLProtocol ff_ipns_protocol;
>>>
>>>  #include "libavformat/protocol_list.c"
>>>
>>
>>
> Thank you very much for your review!
> Please do hit me back with a reply on the questions I have still open.
> 
> I'm honestly quite done with patching this over and over again (it's open
> for months now) so I'd like to put the pace in these fixes and send an
> updated version today.
>
Mark Gaiser March 30, 2022, 3:16 p.m. UTC | #4
On Wed, Mar 30, 2022 at 3:57 PM Andreas Rheinhardt <
andreas.rheinhardt@outlook.com> wrote:

> Mark Gaiser:
> > On Wed, Mar 30, 2022 at 2:21 PM Andreas Rheinhardt <
> > andreas.rheinhardt@outlook.com> wrote:
> >
> >> Mark Gaiser:
> >>> This patch adds support for:
> >>> - ffplay ipfs://<cid>
> >>> - ffplay ipns://<cid>
> >>>
> >>> IPFS data can be played from so called "ipfs gateways".
> >>> A gateway is essentially a webserver that gives access to the
> >>> distributed IPFS network.
> >>>
> >>> This protocol support (ipfs and ipns) therefore translates
> >>> ipfs:// and ipns:// to a http:// url. This resulting url is
> >>> then handled by the http protocol. It could also be https
> >>> depending on the gateway provided.
> >>>
> >>> To use this protocol, a gateway must be provided.
> >>> If you do nothing it will try to find it in your
> >>> $HOME/.ipfs/gateway file. The ways to set it manually are:
> >>> 1. Define a -gateway <url> to the gateway.
> >>> 2. Define $IPFS_GATEWAY with the full http link to the gateway.
> >>> 3. Define $IPFS_PATH and point it to the IPFS data path.
> >>> 4. Have IPFS running in your local user folder (under $HOME/.ipfs).
> >>>
> >>> Signed-off-by: Mark Gaiser <markg85@gmail.com>
> >>> ---
> >>>  configure                 |   2 +
> >>>  doc/protocols.texi        |  30 ++++
> >>>  libavformat/Makefile      |   2 +
> >>>  libavformat/ipfsgateway.c | 309 ++++++++++++++++++++++++++++++++++++++
> >>>  libavformat/protocols.c   |   2 +
> >>>  5 files changed, 345 insertions(+)
> >>>  create mode 100644 libavformat/ipfsgateway.c
> >>>
> >>> diff --git a/configure b/configure
> >>> index e4d36aa639..55af90957a 100755
> >>> --- a/configure
> >>> +++ b/configure
> >>> @@ -3579,6 +3579,8 @@ udp_protocol_select="network"
> >>>  udplite_protocol_select="network"
> >>>  unix_protocol_deps="sys_un_h"
> >>>  unix_protocol_select="network"
> >>> +ipfs_protocol_select="https_protocol"
> >>> +ipns_protocol_select="https_protocol"
> >>>
> >>>  # external library protocols
> >>>  libamqp_protocol_deps="librabbitmq"
> >>> diff --git a/doc/protocols.texi b/doc/protocols.texi
> >>> index d207df0b52..7c9c0a4808 100644
> >>> --- a/doc/protocols.texi
> >>> +++ b/doc/protocols.texi
> >>> @@ -2025,5 +2025,35 @@ decoding errors.
> >>>
> >>>  @end table
> >>>
> >>> +@section ipfs
> >>> +
> >>> +InterPlanetary File System (IPFS) protocol support. One can access
> >> files stored
> >>> +on the IPFS network through so called gateways. Those are http(s)
> >> endpoints.
> >>> +This protocol wraps the IPFS native protocols (ipfs:// and ipns://) to
> >> be send
> >>> +to such a gateway. Users can (and should) host their own node which
> >> means this
> >>> +protocol will use your local machine gateway to access files on the
> >> IPFS network.
> >>> +
> >>> +If a user doesn't have a node of their own then the public gateway
> >> dweb.link is
> >>> +used by default.
> >>> +
> >>> +You can use this protocol in 2 ways. Using IPFS:
> >>> +@example
> >>> +ffplay ipfs://QmbGtJg23skhvFmu9mJiePVByhfzu5rwo74MEkVDYAmF5T
> >>> +@end example
> >>> +
> >>> +Or the IPNS protocol (IPNS is mutable IPFS):
> >>> +@example
> >>> +ffplay ipns://QmbGtJg23skhvFmu9mJiePVByhfzu5rwo74MEkVDYAmF5T
> >>> +@end example
> >>> +
> >>> +You can also change the gateway to be used:
> >>> +
> >>> +@table @option
> >>> +
> >>> +@item gateway
> >>> +Defines the gateway to use. When nothing is provided the protocol will
> >> first try
> >>> +your local gateway. If that fails dweb.link will be used.
> >>> +
> >>> +@end table
> >>>
> >>>  @c man end PROTOCOLS
> >>> diff --git a/libavformat/Makefile b/libavformat/Makefile
> >>> index d7182d6bd8..e3233fd7ac 100644
> >>> --- a/libavformat/Makefile
> >>> +++ b/libavformat/Makefile
> >>> @@ -660,6 +660,8 @@ OBJS-$(CONFIG_SRTP_PROTOCOL)             +=
> >> srtpproto.o srtp.o
> >>>  OBJS-$(CONFIG_SUBFILE_PROTOCOL)          += subfile.o
> >>>  OBJS-$(CONFIG_TEE_PROTOCOL)              += teeproto.o tee_common.o
> >>>  OBJS-$(CONFIG_TCP_PROTOCOL)              += tcp.o
> >>> +OBJS-$(CONFIG_IPFS_PROTOCOL)             += ipfsgateway.o
> >>> +OBJS-$(CONFIG_IPNS_PROTOCOL)             += ipfsgateway.o
> >>>  TLS-OBJS-$(CONFIG_GNUTLS)                += tls_gnutls.o
> >>>  TLS-OBJS-$(CONFIG_LIBTLS)                += tls_libtls.o
> >>>  TLS-OBJS-$(CONFIG_MBEDTLS)               += tls_mbedtls.o
> >>> diff --git a/libavformat/ipfsgateway.c b/libavformat/ipfsgateway.c
> >>> new file mode 100644
> >>> index 0000000000..1a039589c0
> >>> --- /dev/null
> >>> +++ b/libavformat/ipfsgateway.c
> >>> @@ -0,0 +1,309 @@
> >>> +/*
> >>> + * IPFS and IPNS protocol support through IPFS Gateway.
> >>> + * Copyright (c) 2022 Mark Gaiser
> >>> + *
> >>> + * This file is part of FFmpeg.
> >>> + *
> >>> + * FFmpeg is free software; you can redistribute it and/or
> >>> + * modify it under the terms of the GNU Lesser General Public
> >>> + * License as published by the Free Software Foundation; either
> >>> + * version 2.1 of the License, or (at your option) any later version.
> >>> + *
> >>> + * FFmpeg is distributed in the hope that it will be useful,
> >>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> >>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> >>> + * Lesser General Public License for more details.
> >>> + *
> >>> + * You should have received a copy of the GNU Lesser General Public
> >>> + * License along with FFmpeg; if not, write to the Free Software
> >>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> >> 02110-1301 USA
> >>> + */
> >>> +
> >>> +#include "avformat.h"
> >>> +#include "libavutil/avassert.h"
> >>
> >> Unused.
> >>
> >>> +#include "libavutil/avstring.h"
> >>> +#include "libavutil/internal.h"
> >>> +#include "libavutil/opt.h"
> >>> +#include "libavutil/tree.h"
> >>
> >> ?
> >>
> >
> > This whole include part can be cleaned up much more.
> > Just having these:
> > #include "libavutil/avstring.h"
> > #include "libavutil/opt.h"
> > #include "url.h"
> > #include <sys/stat.h>
> >
> > is enough.
> >
> >
> >>> +#include <fcntl.h>
> >>> +#if HAVE_IO_H
> >>> +#include <io.h>
> >>> +#endif
> >>> +#if HAVE_UNISTD_H
> >>> +#include <unistd.h>
> >>> +#endif
> >>> +#include "os_support.h"
> >>> +#include "url.h"
> >>> +#include <stdlib.h>
> >>> +#include <sys/stat.h>
> >>> +
> >>> +typedef struct IPFSGatewayContext {
> >>> +    AVClass *class;
> >>> +    URLContext *inner;
> >>> +    // Is filled by the -gateway argument and not changed after.
> >>> +    char *gateway;
> >>> +    // If the above gateway is non null, it will be copied into this
> >> buffer.
> >>> +    // Else this buffer will contain the auto detected gateway.
> >>> +    // In either case, the gateway to use will be in this buffer.
> >>> +    char gateway_buffer[PATH_MAX];
> >>> +} IPFSGatewayContext;
> >>> +
> >>> +// A best-effort way to find the IPFS gateway.
> >>> +// Only the most appropiate gateway is set. It's not actually
> requested
> >>> +// (http call) to prevent a potential slowdown in startup. A potential
> >> timeout
> >>> +// is handled by the HTTP protocol.
> >>> +static int populate_ipfs_gateway(URLContext *h)
> >>> +{
> >>> +    IPFSGatewayContext *c = h->priv_data;
> >>> +    char ipfs_full_data_folder[PATH_MAX];
> >>> +    char ipfs_gateway_file[PATH_MAX];
> >>> +    struct stat st;
> >>> +    int stat_ret = 0;
> >>> +    int ret = AVERROR(EINVAL);
> >>> +    FILE *gateway_file = NULL;
> >>> +
> >>> +    // Test $IPFS_GATEWAY.
> >>> +    if (getenv("IPFS_GATEWAY") != NULL) {
> >>> +        if (snprintf(c->gateway_buffer, sizeof(c->gateway_buffer),
> "%s",
> >>> +                     getenv("IPFS_GATEWAY")) >=
> >> sizeof(c->gateway_buffer)) {
> >>> +            av_log(h, AV_LOG_WARNING, "The IPFS_GATEWAY environment
> >> variable exceeds the maximum length. We allow a max of %zu
> characters\n",
> >> sizeof(c->gateway_buffer));
> >>> +            ret = AVERROR(EINVAL);
> >>> +            goto err;
> >>> +        }
> >>> +
> >>> +        ret = 1;
> >>> +        goto err;
> >>> +    } else
> >>> +        av_log(h, AV_LOG_DEBUG, "$IPFS_GATEWAY is empty.\n");
> >>> +
> >>> +    // We need to know the IPFS folder to - eventually - read the
> >> contents of
> >>> +    // the "gateway" file which would tell us the gateway to use.
> >>> +    if (getenv("IPFS_PATH") == NULL) {
> >>> +        av_log(h, AV_LOG_DEBUG, "$IPFS_PATH is empty.\n");
> >>> +
> >>> +        // Try via the home folder.
> >>> +        if (getenv("HOME") == NULL) {
> >>> +            av_log(h, AV_LOG_WARNING, "$HOME appears to be empty.\n");
> >>> +            ret = AVERROR(EINVAL);
> >>> +            goto err;
> >>> +        }
> >>> +
> >>> +        // Verify the composed path fits.
> >>> +        if (snprintf(ipfs_full_data_folder,
> >> sizeof(ipfs_full_data_folder),
> >>> +                     "%s/.ipfs/", getenv("HOME")) >=
> >> sizeof(ipfs_full_data_folder)) {
> >>> +            av_log(h, AV_LOG_WARNING, "The IPFS data path exceeds the
> >> max path length (%zu)\n", sizeof(ipfs_full_data_folder));
> >>> +            ret = AVERROR(EINVAL);
> >>> +            goto err;
> >>> +        }
> >>> +
> >>> +        // Stat the folder.
> >>> +        // It should exist in a default IPFS setup when run as local
> >> user.
> >>> +#ifndef _WIN32
> >>> +        stat_ret = stat(ipfs_full_data_folder, &st);
> >>> +#else
> >>> +        stat_ret = win32_stat(ipfs_full_data_folder, &st);
> >>> +#endif
> >>> +        if (stat_ret < 0) {
> >>> +            av_log(h, AV_LOG_INFO, "Unable to find IPFS folder. We
> >> tried:\n");
> >>> +            av_log(h, AV_LOG_INFO, "- $IPFS_PATH, which was
> empty.\n");
> >>> +            av_log(h, AV_LOG_INFO, "- $HOME/.ipfs (full uri: %s) which
> >> doesn't exist.\n", ipfs_full_data_folder);
> >>> +            ret = AVERROR(ENOENT);
> >>> +            goto err;
> >>> +        }
> >>> +    } else {
> >>> +        if (snprintf(ipfs_full_data_folder,
> >> sizeof(ipfs_full_data_folder), "%s",
> >>> +                 getenv("IPFS_PATH")) >=
> sizeof(ipfs_full_data_folder))
> >> {
> >>> +            av_log(h, AV_LOG_WARNING, "The IPFS_PATH environment
> >> variable exceeds the maximum length. We allow a max of %zu
> characters\n",
> >> sizeof(c->gateway_buffer));
> >>> +            ret = AVERROR(EINVAL);
> >>> +            goto err;
> >>> +        }
> >>> +
> >>> +    }
> >>> +
> >>> +    // Copy the fully composed gateway path into ipfs_gateway_file.
> >>> +    if (snprintf(ipfs_gateway_file, sizeof(ipfs_gateway_file),
> >> "%sgateway",
> >>> +                 ipfs_full_data_folder) >= sizeof(ipfs_gateway_file))
> {
> >>> +        av_log(h, AV_LOG_WARNING, "The IPFS gateway file path exceeds
> >> the max path length (%zu)\n", sizeof(ipfs_gateway_file));
> >>> +        ret = AVERROR(ENOENT);
> >>> +        goto err;
> >>> +    }
> >>> +
> >>> +    // Get the contents of the gateway file.
> >>> +    gateway_file = av_fopen_utf8(ipfs_gateway_file, "r");
> >>> +    if (!gateway_file) {
> >>> +        av_log(h, AV_LOG_WARNING, "The IPFS gateway file (full uri:
> %s)
> >> doesn't exist. Is the gateway enabled?\n", ipfs_gateway_file);
> >>> +        ret = AVERROR(ENOENT);
> >>> +        goto err;
> >>> +    }
> >>> +
> >>> +    // Read a single line (fgets stops at new line mark).
> >>> +    fgets(c->gateway_buffer, sizeof(c->gateway_buffer) - 1,
> >> gateway_file);
> >>> +
> >>> +    // Replace the last char with \0
> >>> +    c->gateway_buffer[sizeof(c->gateway_buffer) - 1] = 0;
> >>> +
> >>> +    // Replace first occurence of end of line with \0
> >>> +    c->gateway_buffer[strcspn(c->gateway_buffer, "\r")] = 0;
> >>> +    c->gateway_buffer[strcspn(c->gateway_buffer, "\n")] = 0;
> >>
> >> If the buffer contains both \r and \n and the first \n precedes the
> >> first \r, then the above zeroes both the first \r and the first \n. If
> >> it is enough to only zero the first newline, then this can be simplified
> >> to "c->gateway_buffer[strcspn(c->gateway_buffer, "\r\n")] = 0;".
> >>
> >
> > It used to be in your suggested simplified approach.
> > It was then suggested to split it because of new line differences
> > on different platforms. This was supposed to catch "everything".
> >
> > I prefer to keep this as-is now.
> >
>
> Is it intended to use what is between the first \n and the first \r
> lateron although this data will be after the '\0' after this? That would
> be very weird.
>

You have to elaborate on that as that sounds really wrong to me.

If I'm correct a file doesn't start with either \r or \n. Unless you have
intentional line breaks.
It ends with those, not starts. I'm just going to ignore \r for now for
brevity.

So if you have:
\n<your gateway>\n

Would be translated to:
\0<your gateway>\n

Then indeed you won't get a gateway because it's intended that your first
line is that gateway.

It must be in this format:
<your gateway>\n

or without a newline
<your gateway>

There is no blank line trimming to fix user screwups.
Users are not intended to touch that file anyhow. It's auto-generated by
IPFS and removed once it shuts down.


> >
> >>> +
> >>> +    // If strlen finds anything longer then 0 characters then we have
> a
> >>> +    // potential gateway url.
> >>> +    if (strlen(c->gateway_buffer) < 1) {
> >>
> >> if (*c->gateway_buffer == '\0')
> >>
> >
> > Is that a style difference or an actual behavior difference?
> >
>
> The former would call strlen to check whether a string is empty (unless
> the compiler optimizes it away). No actual behaviour difference exists.
> But it is nevertheless more than a style difference.
>

Fixed locally.

>
> >
> >>
> >>> +        av_log(h, AV_LOG_WARNING, "The IPFS gateway file (full uri:
> %s)
> >> appears to be empty. Is the gateway started?\n", ipfs_gateway_file);
> >>> +        ret = AVERROR(EILSEQ);
> >>> +        goto err;
> >>> +    } else {
> >>> +        // We're done, the c->gateway_buffer has something that looks
> >> valid.
> >>> +        ret = 1;
> >>> +        goto err;
> >>> +    }
> >>> +
> >>> +err:
> >>> +    if (gateway_file)
> >>> +        fclose(gateway_file);
> >>> +
> >>> +    return ret;
> >>> +}
> >>> +
> >>> +static int translate_ipfs_to_http(URLContext *h, const char *uri,
> >>> +                                  int flags, AVDictionary **options)
> >>> +{
> >>> +    const char *ipfs_cid;
> >>> +    char *fulluri = NULL;
> >>> +    int ret;
> >>> +    IPFSGatewayContext *c = h->priv_data;
> >>> +
> >>> +    // Test for ipfs://, ipfs:, ipns:// and ipns:. This prefix is
> >> stripped from
> >>> +    // the string leaving just the CID in ipfs_cid.
> >>> +    int is_ipfs = av_stristart(uri, "ipfs://", &ipfs_cid);
> >>> +    int is_ipns = av_stristart(uri, "ipns://", &ipfs_cid);
> >>> +
> >>> +    // We must have either ipns or ipfs.
> >>> +    if (!is_ipfs && !is_ipns) {
> >>> +        ret = AVERROR(EINVAL);
> >>> +        av_log(h, AV_LOG_WARNING, "Unsupported url %s\n", uri);
> >>> +        goto err;
> >>> +    }
> >>> +
> >>> +    // If the CID has a length greater then 0 then we assume we have a
> >> proper working one.
> >>> +    // It could still be wrong but in that case the gateway should
> save
> >> us and
> >>> +    // ruturn a 403 error. The http protocol handles this.
> >>> +    if (strlen(ipfs_cid) < 1) {
> >>> +        av_log(h, AV_LOG_WARNING, "A CID must be provided.\n");
> >>> +        ret = AVERROR(EILSEQ);
> >>> +        goto err;
> >>> +    }
> >>> +
> >>> +    // Populate c->gateway_buffer with whatever is in c->gateway
> >>> +    if (c->gateway != NULL) {
> >>> +        if (snprintf(c->gateway_buffer, sizeof(c->gateway_buffer),
> "%s",
> >>> +                     c->gateway) >= sizeof(c->gateway_buffer)) {
> >>> +            av_log(h, AV_LOG_WARNING, "The -gateway parameter is too
> >> long. We allow a max of %zu characters\n", sizeof(c->gateway_buffer));
> >>
> >> We typically use SIZE_SPECIFIER instead of z for compatibility with
> >> ancient versions of MSVC.
> >> (I don't know whether there is any supported version of MSVC that
> >> doesn't support z; I don't use MSVC myself.)
> >>
> >
> > Here too I was explicitly suggested to use %zu (when I was using - I
> think
> > - %lu before).
> > So I assume that the ancient MSVC version you're referring to is probably
> > not supported anymore from an ffmpeg compiler requirement point of view?
> >
> > Should i now change all "%zu" to "%"SIZE_SPECIFIER (this does not make it
> > neater nor shorter with the 80 char line limit).
> > Is this change required?
>
> As said: It is for compatibility with ancient versions of MSVC. But I
> don't know whether any of the actually supported versions of MSVC still
> need it.
>
> >
> > If it is, what _exactly_ do i need to change it in? I see a
> > couple different SIZE_SPECIFIER prefixes. I have no clue what to use
> here.
>
> There is only one SIZE_SPECIFIER, namely SIZE_SPECIFIER; the
> PTRDIFF_SPECIFIER (or whatever you see) is obviously not the thing to
> use for size_t.
>
> I see some:
%"SIZE_SPECIFIER"

But also like this:
%8"SIZE_SPECIFIER
%5"SIZE_SPECIFIER

I'm assuming they are for clipping?

I really don't like having to change it to this as it:
- Uglifies the code (and i doubt if it's even needed)
- Directly contradicts earlier feedback
- And due to the above gives me a feeling of wasting time
- Fine to change it but I hope there isn't another person popping up with a
question to change it yet again....


> >
> >
> >>> +            ret = AVERROR(EINVAL);
> >>> +            goto err;
> >>> +        }
> >>> +    } else {
> >>> +        // Populate the IPFS gateway if we have any.
> >>> +        // If not, inform the user how to properly set one.
> >>> +        ret = populate_ipfs_gateway(h);
> >>> +
> >>> +        if (ret < 1) {
> >>> +            // We fallback on dweb.link (managed by Protocol Labs).
> >>> +            snprintf(c->gateway_buffer, sizeof(c->gateway_buffer), "
> >> https://dweb.link");
> >>> +
> >>> +            av_log(h, AV_LOG_WARNING, "IPFS does not appear to be
> >> running. You’re now using the public gateway at dweb.link.\n");
> >>> +            av_log(h, AV_LOG_INFO, "Installing IPFS locally is
> >> recommended to improve performance and reliability, and not share all
> your
> >> activity with a single IPFS gateway.\n");
> >>> +            av_log(h, AV_LOG_INFO, "There are multiple options to
> >> define this gateway.\n");
> >>> +            av_log(h, AV_LOG_INFO, "1. Call ffmpeg with a gateway
> >> param, without a trailing slash: -gateway <url>.\n");
> >>> +            av_log(h, AV_LOG_INFO, "2. Define an $IPFS_GATEWAY
> >> environment variable with the full HTTP URL to the gateway without
> trailing
> >> forward slash.\n");
> >>> +            av_log(h, AV_LOG_INFO, "3. Define an $IPFS_PATH
> environment
> >> variable and point it to the IPFS data path - this is typically
> ~/.ipfs\n");
> >>
> >> All those AV_LOG_INFO can be combined which has the advantage that the
> >> logs can't be teared apart (which they can now if something else logs at
> >> the same time); furthermore, this would also decrease codesize.
> >>
> >
> > Do you have an example of where that's happening?
> >
>
> It can happen any time you have multiple threads using av_log at the
> same time. Given that your statements are supposed to be full lines, it
> is not that bad if it happens here, but it is nevertheless suboptimal.
>

I meant a multi-line av_log example ;) But yeah, I get how threading could
screw this up.

>
> >>
> >>> +        }
> >>> +    }
> >>> +
> >>> +    // Test if the gateway starts with either http:// or https://
> >>> +    if (av_stristart(c->gateway_buffer, "http://", NULL) == 0
> >>> +        && av_stristart(c->gateway_buffer, "https://", NULL) == 0) {
> >>> +        av_log(h, AV_LOG_WARNING, "The gateway URL didn't start with
> >> http:// or https:// and is therefore invalid.\n");
> >>> +        ret = AVERROR(EILSEQ);
> >>> +        goto err;
> >>> +    }
> >>> +
> >>> +    // Concatenate the url.
> >>> +    // This ends up with something like:
> >> http://localhost:8080/ipfs/Qm.....
> >>> +    // The format of "%s%s%s%s" is the following:
> >>> +    // 1st %s = The gateway.
> >>> +    // 2nd %s = If the gateway didn't end in a slash, add a "/".
> >> Otherwise it's an empty string
> >>> +    // 3rd %s = Either ipns/ or ipfs/.
> >>> +    // 4th %s = The IPFS CID (Qm..., bafy..., ...).
> >>> +    fulluri = av_asprintf("%s%s%s%s",
> >>> +                          c->gateway_buffer,
> >>> +                          (c->gateway_buffer[strlen(c->gateway_buffer)
> >> - 1] == '/') ? "" : "/",
> >>> +                          (is_ipns) ? "ipns/" : "ipfs/",
> >>> +                          ipfs_cid);
> >>
> >> Missing allocation check.
> >>
> >
> > Ah
> > Fixed it locally.
> >
> >
> >>
> >>> +
> >>> +    // Pass the URL back to FFMpeg's protocol handler.
> >>> +    if ((ret = ffurl_open_whitelist(&c->inner, fulluri, flags,
> >>> +                                    &h->interrupt_callback, options,
> >>> +                                    h->protocol_whitelist,
> >>> +                                    h->protocol_blacklist, h))
> >>> +        < 0) {
> >>
> >> Weird formatting; why don't you just use
> >> ret = ffurl_open_whitelist(...);
> >> if (ret < 0) {
> >>
> >
> > Fixed.
> >
> >
> >>
> >>> +        av_log(h, AV_LOG_WARNING, "Unable to open resource: %s\n",
> >> fulluri);
> >>> +        goto err;
> >>> +    }
> >>> +
> >>> +err:
> >>> +    av_free(fulluri);
> >>> +    return ret;
> >>> +}
> >>> +
> >>> +static int ipfs_read(URLContext *h, unsigned char *buf, int size)
> >>> +{
> >>> +    IPFSGatewayContext *c = h->priv_data;
> >>> +    return ffurl_read(c->inner, buf, size);
> >>> +}
> >>> +
> >>> +static int64_t ipfs_seek(URLContext *h, int64_t pos, int whence)
> >>> +{
> >>> +    IPFSGatewayContext *c = h->priv_data;
> >>> +    return ffurl_seek(c->inner, pos, whence);
> >>> +}
> >>> +
> >>> +static int ipfs_close(URLContext *h)
> >>> +{
> >>> +    IPFSGatewayContext *c = h->priv_data;
> >>> +    return ffurl_closep(&c->inner);
> >>> +}
> >>> +
> >>> +#define OFFSET(x) offsetof(IPFSGatewayContext, x)
> >>> +
> >>> +static const AVOption options[] = {
> >>> +    {"gateway", "The gateway to ask for IPFS data.", OFFSET(gateway),
> >> AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, AV_OPT_FLAG_DECODING_PARAM},
> >>> +    {NULL},
> >>> +};
> >>> +
> >>> +static const AVClass ipfs_context_class = {
> >>> +    .class_name   = "IPFS",
> >>> +    .item_name    = av_default_item_name,
> >>> +    .option       = options,
> >>> +    .version      = LIBAVUTIL_VERSION_INT,
> >>> +};
> >>> +
> >>> +const URLProtocol ff_ipfs_protocol = {
> >>> +    .name             = "ipfs",
> >>> +    .url_open2        = translate_ipfs_to_http,
> >>> +    .url_read         = ipfs_read,
> >>> +    .url_seek         = ipfs_seek,
> >>> +    .url_close        = ipfs_close,
> >>> +    .priv_data_size   = sizeof(IPFSGatewayContext),
> >>> +    .priv_data_class  = &ipfs_context_class,
> >>> +};
> >>> +
> >>> +const URLProtocol ff_ipns_protocol = {
> >>> +    .name             = "ipns",
> >>> +    .url_open2        = translate_ipfs_to_http,
> >>> +    .url_read         = ipfs_read,
> >>> +    .url_seek         = ipfs_seek,
> >>> +    .url_close        = ipfs_close,
> >>> +    .priv_data_size   = sizeof(IPFSGatewayContext),
> >>> +    .priv_data_class  = &ipfs_context_class,
> >>> +};
> >>> diff --git a/libavformat/protocols.c b/libavformat/protocols.c
> >>> index d07563cd0c..6ee62a598a 100644
> >>> --- a/libavformat/protocols.c
> >>> +++ b/libavformat/protocols.c
> >>> @@ -71,6 +71,8 @@ extern const URLProtocol ff_libsrt_protocol;
> >>>  extern const URLProtocol ff_libssh_protocol;
> >>>  extern const URLProtocol ff_libsmbclient_protocol;
> >>>  extern const URLProtocol ff_libzmq_protocol;
> >>> +extern const URLProtocol ff_ipfs_protocol;
> >>> +extern const URLProtocol ff_ipns_protocol;
> >>>
> >>>  #include "libavformat/protocol_list.c"
> >>>
> >>
> >>
> > Thank you very much for your review!
> > Please do hit me back with a reply on the questions I have still open.
> >
> > I'm honestly quite done with patching this over and over again (it's open
> > for months now) so I'd like to put the pace in these fixes and send an
> > updated version today.
> >
>
>
Mark Gaiser March 31, 2022, 4:01 p.m. UTC | #5
On Wed, Mar 30, 2022 at 5:16 PM Mark Gaiser <markg85@gmail.com> wrote:

>
>
> On Wed, Mar 30, 2022 at 3:57 PM Andreas Rheinhardt <
> andreas.rheinhardt@outlook.com> wrote:
>
>> Mark Gaiser:
>> > On Wed, Mar 30, 2022 at 2:21 PM Andreas Rheinhardt <
>> > andreas.rheinhardt@outlook.com> wrote:
>> >
>> >> Mark Gaiser:
>> >>> This patch adds support for:
>> >>> - ffplay ipfs://<cid>
>> >>> - ffplay ipns://<cid>
>> >>>
>> >>> IPFS data can be played from so called "ipfs gateways".
>> >>> A gateway is essentially a webserver that gives access to the
>> >>> distributed IPFS network.
>> >>>
>> >>> This protocol support (ipfs and ipns) therefore translates
>> >>> ipfs:// and ipns:// to a http:// url. This resulting url is
>> >>> then handled by the http protocol. It could also be https
>> >>> depending on the gateway provided.
>> >>>
>> >>> To use this protocol, a gateway must be provided.
>> >>> If you do nothing it will try to find it in your
>> >>> $HOME/.ipfs/gateway file. The ways to set it manually are:
>> >>> 1. Define a -gateway <url> to the gateway.
>> >>> 2. Define $IPFS_GATEWAY with the full http link to the gateway.
>> >>> 3. Define $IPFS_PATH and point it to the IPFS data path.
>> >>> 4. Have IPFS running in your local user folder (under $HOME/.ipfs).
>> >>>
>> >>> Signed-off-by: Mark Gaiser <markg85@gmail.com>
>> >>> ---
>> >>>  configure                 |   2 +
>> >>>  doc/protocols.texi        |  30 ++++
>> >>>  libavformat/Makefile      |   2 +
>> >>>  libavformat/ipfsgateway.c | 309
>> ++++++++++++++++++++++++++++++++++++++
>> >>>  libavformat/protocols.c   |   2 +
>> >>>  5 files changed, 345 insertions(+)
>> >>>  create mode 100644 libavformat/ipfsgateway.c
>> >>>
>> >>> diff --git a/configure b/configure
>> >>> index e4d36aa639..55af90957a 100755
>> >>> --- a/configure
>> >>> +++ b/configure
>> >>> @@ -3579,6 +3579,8 @@ udp_protocol_select="network"
>> >>>  udplite_protocol_select="network"
>> >>>  unix_protocol_deps="sys_un_h"
>> >>>  unix_protocol_select="network"
>> >>> +ipfs_protocol_select="https_protocol"
>> >>> +ipns_protocol_select="https_protocol"
>> >>>
>> >>>  # external library protocols
>> >>>  libamqp_protocol_deps="librabbitmq"
>> >>> diff --git a/doc/protocols.texi b/doc/protocols.texi
>> >>> index d207df0b52..7c9c0a4808 100644
>> >>> --- a/doc/protocols.texi
>> >>> +++ b/doc/protocols.texi
>> >>> @@ -2025,5 +2025,35 @@ decoding errors.
>> >>>
>> >>>  @end table
>> >>>
>> >>> +@section ipfs
>> >>> +
>> >>> +InterPlanetary File System (IPFS) protocol support. One can access
>> >> files stored
>> >>> +on the IPFS network through so called gateways. Those are http(s)
>> >> endpoints.
>> >>> +This protocol wraps the IPFS native protocols (ipfs:// and ipns://)
>> to
>> >> be send
>> >>> +to such a gateway. Users can (and should) host their own node which
>> >> means this
>> >>> +protocol will use your local machine gateway to access files on the
>> >> IPFS network.
>> >>> +
>> >>> +If a user doesn't have a node of their own then the public gateway
>> >> dweb.link is
>> >>> +used by default.
>> >>> +
>> >>> +You can use this protocol in 2 ways. Using IPFS:
>> >>> +@example
>> >>> +ffplay ipfs://QmbGtJg23skhvFmu9mJiePVByhfzu5rwo74MEkVDYAmF5T
>> >>> +@end example
>> >>> +
>> >>> +Or the IPNS protocol (IPNS is mutable IPFS):
>> >>> +@example
>> >>> +ffplay ipns://QmbGtJg23skhvFmu9mJiePVByhfzu5rwo74MEkVDYAmF5T
>> >>> +@end example
>> >>> +
>> >>> +You can also change the gateway to be used:
>> >>> +
>> >>> +@table @option
>> >>> +
>> >>> +@item gateway
>> >>> +Defines the gateway to use. When nothing is provided the protocol
>> will
>> >> first try
>> >>> +your local gateway. If that fails dweb.link will be used.
>> >>> +
>> >>> +@end table
>> >>>
>> >>>  @c man end PROTOCOLS
>> >>> diff --git a/libavformat/Makefile b/libavformat/Makefile
>> >>> index d7182d6bd8..e3233fd7ac 100644
>> >>> --- a/libavformat/Makefile
>> >>> +++ b/libavformat/Makefile
>> >>> @@ -660,6 +660,8 @@ OBJS-$(CONFIG_SRTP_PROTOCOL)             +=
>> >> srtpproto.o srtp.o
>> >>>  OBJS-$(CONFIG_SUBFILE_PROTOCOL)          += subfile.o
>> >>>  OBJS-$(CONFIG_TEE_PROTOCOL)              += teeproto.o tee_common.o
>> >>>  OBJS-$(CONFIG_TCP_PROTOCOL)              += tcp.o
>> >>> +OBJS-$(CONFIG_IPFS_PROTOCOL)             += ipfsgateway.o
>> >>> +OBJS-$(CONFIG_IPNS_PROTOCOL)             += ipfsgateway.o
>> >>>  TLS-OBJS-$(CONFIG_GNUTLS)                += tls_gnutls.o
>> >>>  TLS-OBJS-$(CONFIG_LIBTLS)                += tls_libtls.o
>> >>>  TLS-OBJS-$(CONFIG_MBEDTLS)               += tls_mbedtls.o
>> >>> diff --git a/libavformat/ipfsgateway.c b/libavformat/ipfsgateway.c
>> >>> new file mode 100644
>> >>> index 0000000000..1a039589c0
>> >>> --- /dev/null
>> >>> +++ b/libavformat/ipfsgateway.c
>> >>> @@ -0,0 +1,309 @@
>> >>> +/*
>> >>> + * IPFS and IPNS protocol support through IPFS Gateway.
>> >>> + * Copyright (c) 2022 Mark Gaiser
>> >>> + *
>> >>> + * This file is part of FFmpeg.
>> >>> + *
>> >>> + * FFmpeg is free software; you can redistribute it and/or
>> >>> + * modify it under the terms of the GNU Lesser General Public
>> >>> + * License as published by the Free Software Foundation; either
>> >>> + * version 2.1 of the License, or (at your option) any later version.
>> >>> + *
>> >>> + * FFmpeg is distributed in the hope that it will be useful,
>> >>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> >>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> >>> + * Lesser General Public License for more details.
>> >>> + *
>> >>> + * You should have received a copy of the GNU Lesser General Public
>> >>> + * License along with FFmpeg; if not, write to the Free Software
>> >>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
>> >> 02110-1301 USA
>> >>> + */
>> >>> +
>> >>> +#include "avformat.h"
>> >>> +#include "libavutil/avassert.h"
>> >>
>> >> Unused.
>> >>
>> >>> +#include "libavutil/avstring.h"
>> >>> +#include "libavutil/internal.h"
>> >>> +#include "libavutil/opt.h"
>> >>> +#include "libavutil/tree.h"
>> >>
>> >> ?
>> >>
>> >
>> > This whole include part can be cleaned up much more.
>> > Just having these:
>> > #include "libavutil/avstring.h"
>> > #include "libavutil/opt.h"
>> > #include "url.h"
>> > #include <sys/stat.h>
>> >
>> > is enough.
>> >
>> >
>> >>> +#include <fcntl.h>
>> >>> +#if HAVE_IO_H
>> >>> +#include <io.h>
>> >>> +#endif
>> >>> +#if HAVE_UNISTD_H
>> >>> +#include <unistd.h>
>> >>> +#endif
>> >>> +#include "os_support.h"
>> >>> +#include "url.h"
>> >>> +#include <stdlib.h>
>> >>> +#include <sys/stat.h>
>> >>> +
>> >>> +typedef struct IPFSGatewayContext {
>> >>> +    AVClass *class;
>> >>> +    URLContext *inner;
>> >>> +    // Is filled by the -gateway argument and not changed after.
>> >>> +    char *gateway;
>> >>> +    // If the above gateway is non null, it will be copied into this
>> >> buffer.
>> >>> +    // Else this buffer will contain the auto detected gateway.
>> >>> +    // In either case, the gateway to use will be in this buffer.
>> >>> +    char gateway_buffer[PATH_MAX];
>> >>> +} IPFSGatewayContext;
>> >>> +
>> >>> +// A best-effort way to find the IPFS gateway.
>> >>> +// Only the most appropiate gateway is set. It's not actually
>> requested
>> >>> +// (http call) to prevent a potential slowdown in startup. A
>> potential
>> >> timeout
>> >>> +// is handled by the HTTP protocol.
>> >>> +static int populate_ipfs_gateway(URLContext *h)
>> >>> +{
>> >>> +    IPFSGatewayContext *c = h->priv_data;
>> >>> +    char ipfs_full_data_folder[PATH_MAX];
>> >>> +    char ipfs_gateway_file[PATH_MAX];
>> >>> +    struct stat st;
>> >>> +    int stat_ret = 0;
>> >>> +    int ret = AVERROR(EINVAL);
>> >>> +    FILE *gateway_file = NULL;
>> >>> +
>> >>> +    // Test $IPFS_GATEWAY.
>> >>> +    if (getenv("IPFS_GATEWAY") != NULL) {
>> >>> +        if (snprintf(c->gateway_buffer, sizeof(c->gateway_buffer),
>> "%s",
>> >>> +                     getenv("IPFS_GATEWAY")) >=
>> >> sizeof(c->gateway_buffer)) {
>> >>> +            av_log(h, AV_LOG_WARNING, "The IPFS_GATEWAY environment
>> >> variable exceeds the maximum length. We allow a max of %zu
>> characters\n",
>> >> sizeof(c->gateway_buffer));
>> >>> +            ret = AVERROR(EINVAL);
>> >>> +            goto err;
>> >>> +        }
>> >>> +
>> >>> +        ret = 1;
>> >>> +        goto err;
>> >>> +    } else
>> >>> +        av_log(h, AV_LOG_DEBUG, "$IPFS_GATEWAY is empty.\n");
>> >>> +
>> >>> +    // We need to know the IPFS folder to - eventually - read the
>> >> contents of
>> >>> +    // the "gateway" file which would tell us the gateway to use.
>> >>> +    if (getenv("IPFS_PATH") == NULL) {
>> >>> +        av_log(h, AV_LOG_DEBUG, "$IPFS_PATH is empty.\n");
>> >>> +
>> >>> +        // Try via the home folder.
>> >>> +        if (getenv("HOME") == NULL) {
>> >>> +            av_log(h, AV_LOG_WARNING, "$HOME appears to be
>> empty.\n");
>> >>> +            ret = AVERROR(EINVAL);
>> >>> +            goto err;
>> >>> +        }
>> >>> +
>> >>> +        // Verify the composed path fits.
>> >>> +        if (snprintf(ipfs_full_data_folder,
>> >> sizeof(ipfs_full_data_folder),
>> >>> +                     "%s/.ipfs/", getenv("HOME")) >=
>> >> sizeof(ipfs_full_data_folder)) {
>> >>> +            av_log(h, AV_LOG_WARNING, "The IPFS data path exceeds the
>> >> max path length (%zu)\n", sizeof(ipfs_full_data_folder));
>> >>> +            ret = AVERROR(EINVAL);
>> >>> +            goto err;
>> >>> +        }
>> >>> +
>> >>> +        // Stat the folder.
>> >>> +        // It should exist in a default IPFS setup when run as local
>> >> user.
>> >>> +#ifndef _WIN32
>> >>> +        stat_ret = stat(ipfs_full_data_folder, &st);
>> >>> +#else
>> >>> +        stat_ret = win32_stat(ipfs_full_data_folder, &st);
>> >>> +#endif
>> >>> +        if (stat_ret < 0) {
>> >>> +            av_log(h, AV_LOG_INFO, "Unable to find IPFS folder. We
>> >> tried:\n");
>> >>> +            av_log(h, AV_LOG_INFO, "- $IPFS_PATH, which was
>> empty.\n");
>> >>> +            av_log(h, AV_LOG_INFO, "- $HOME/.ipfs (full uri: %s)
>> which
>> >> doesn't exist.\n", ipfs_full_data_folder);
>> >>> +            ret = AVERROR(ENOENT);
>> >>> +            goto err;
>> >>> +        }
>> >>> +    } else {
>> >>> +        if (snprintf(ipfs_full_data_folder,
>> >> sizeof(ipfs_full_data_folder), "%s",
>> >>> +                 getenv("IPFS_PATH")) >=
>> sizeof(ipfs_full_data_folder))
>> >> {
>> >>> +            av_log(h, AV_LOG_WARNING, "The IPFS_PATH environment
>> >> variable exceeds the maximum length. We allow a max of %zu
>> characters\n",
>> >> sizeof(c->gateway_buffer));
>> >>> +            ret = AVERROR(EINVAL);
>> >>> +            goto err;
>> >>> +        }
>> >>> +
>> >>> +    }
>> >>> +
>> >>> +    // Copy the fully composed gateway path into ipfs_gateway_file.
>> >>> +    if (snprintf(ipfs_gateway_file, sizeof(ipfs_gateway_file),
>> >> "%sgateway",
>> >>> +                 ipfs_full_data_folder) >=
>> sizeof(ipfs_gateway_file)) {
>> >>> +        av_log(h, AV_LOG_WARNING, "The IPFS gateway file path exceeds
>> >> the max path length (%zu)\n", sizeof(ipfs_gateway_file));
>> >>> +        ret = AVERROR(ENOENT);
>> >>> +        goto err;
>> >>> +    }
>> >>> +
>> >>> +    // Get the contents of the gateway file.
>> >>> +    gateway_file = av_fopen_utf8(ipfs_gateway_file, "r");
>> >>> +    if (!gateway_file) {
>> >>> +        av_log(h, AV_LOG_WARNING, "The IPFS gateway file (full uri:
>> %s)
>> >> doesn't exist. Is the gateway enabled?\n", ipfs_gateway_file);
>> >>> +        ret = AVERROR(ENOENT);
>> >>> +        goto err;
>> >>> +    }
>> >>> +
>> >>> +    // Read a single line (fgets stops at new line mark).
>> >>> +    fgets(c->gateway_buffer, sizeof(c->gateway_buffer) - 1,
>> >> gateway_file);
>> >>> +
>> >>> +    // Replace the last char with \0
>> >>> +    c->gateway_buffer[sizeof(c->gateway_buffer) - 1] = 0;
>> >>> +
>> >>> +    // Replace first occurence of end of line with \0
>> >>> +    c->gateway_buffer[strcspn(c->gateway_buffer, "\r")] = 0;
>> >>> +    c->gateway_buffer[strcspn(c->gateway_buffer, "\n")] = 0;
>> >>
>> >> If the buffer contains both \r and \n and the first \n precedes the
>> >> first \r, then the above zeroes both the first \r and the first \n. If
>> >> it is enough to only zero the first newline, then this can be
>> simplified
>> >> to "c->gateway_buffer[strcspn(c->gateway_buffer, "\r\n")] = 0;".
>> >>
>> >
>> > It used to be in your suggested simplified approach.
>> > It was then suggested to split it because of new line differences
>> > on different platforms. This was supposed to catch "everything".
>> >
>> > I prefer to keep this as-is now.
>> >
>>
>> Is it intended to use what is between the first \n and the first \r
>> lateron although this data will be after the '\0' after this? That would
>> be very weird.
>>
>
> You have to elaborate on that as that sounds really wrong to me.
>
> If I'm correct a file doesn't start with either \r or \n. Unless you have
> intentional line breaks.
> It ends with those, not starts. I'm just going to ignore \r for now for
> brevity.
>
> So if you have:
> \n<your gateway>\n
>
> Would be translated to:
> \0<your gateway>\n
>
> Then indeed you won't get a gateway because it's intended that your first
> line is that gateway.
>
> It must be in this format:
> <your gateway>\n
>
> or without a newline
> <your gateway>
>
> There is no blank line trimming to fix user screwups.
> Users are not intended to touch that file anyhow. It's auto-generated by
> IPFS and removed once it shuts down.
>
>
>> >
>> >>> +
>> >>> +    // If strlen finds anything longer then 0 characters then we
>> have a
>> >>> +    // potential gateway url.
>> >>> +    if (strlen(c->gateway_buffer) < 1) {
>> >>
>> >> if (*c->gateway_buffer == '\0')
>> >>
>> >
>> > Is that a style difference or an actual behavior difference?
>> >
>>
>> The former would call strlen to check whether a string is empty (unless
>> the compiler optimizes it away). No actual behaviour difference exists.
>> But it is nevertheless more than a style difference.
>>
>
> Fixed locally.
>
>>
>> >
>> >>
>> >>> +        av_log(h, AV_LOG_WARNING, "The IPFS gateway file (full uri:
>> %s)
>> >> appears to be empty. Is the gateway started?\n", ipfs_gateway_file);
>> >>> +        ret = AVERROR(EILSEQ);
>> >>> +        goto err;
>> >>> +    } else {
>> >>> +        // We're done, the c->gateway_buffer has something that looks
>> >> valid.
>> >>> +        ret = 1;
>> >>> +        goto err;
>> >>> +    }
>> >>> +
>> >>> +err:
>> >>> +    if (gateway_file)
>> >>> +        fclose(gateway_file);
>> >>> +
>> >>> +    return ret;
>> >>> +}
>> >>> +
>> >>> +static int translate_ipfs_to_http(URLContext *h, const char *uri,
>> >>> +                                  int flags, AVDictionary **options)
>> >>> +{
>> >>> +    const char *ipfs_cid;
>> >>> +    char *fulluri = NULL;
>> >>> +    int ret;
>> >>> +    IPFSGatewayContext *c = h->priv_data;
>> >>> +
>> >>> +    // Test for ipfs://, ipfs:, ipns:// and ipns:. This prefix is
>> >> stripped from
>> >>> +    // the string leaving just the CID in ipfs_cid.
>> >>> +    int is_ipfs = av_stristart(uri, "ipfs://", &ipfs_cid);
>> >>> +    int is_ipns = av_stristart(uri, "ipns://", &ipfs_cid);
>> >>> +
>> >>> +    // We must have either ipns or ipfs.
>> >>> +    if (!is_ipfs && !is_ipns) {
>> >>> +        ret = AVERROR(EINVAL);
>> >>> +        av_log(h, AV_LOG_WARNING, "Unsupported url %s\n", uri);
>> >>> +        goto err;
>> >>> +    }
>> >>> +
>> >>> +    // If the CID has a length greater then 0 then we assume we have
>> a
>> >> proper working one.
>> >>> +    // It could still be wrong but in that case the gateway should
>> save
>> >> us and
>> >>> +    // ruturn a 403 error. The http protocol handles this.
>> >>> +    if (strlen(ipfs_cid) < 1) {
>> >>> +        av_log(h, AV_LOG_WARNING, "A CID must be provided.\n");
>> >>> +        ret = AVERROR(EILSEQ);
>> >>> +        goto err;
>> >>> +    }
>> >>> +
>> >>> +    // Populate c->gateway_buffer with whatever is in c->gateway
>> >>> +    if (c->gateway != NULL) {
>> >>> +        if (snprintf(c->gateway_buffer, sizeof(c->gateway_buffer),
>> "%s",
>> >>> +                     c->gateway) >= sizeof(c->gateway_buffer)) {
>> >>> +            av_log(h, AV_LOG_WARNING, "The -gateway parameter is too
>> >> long. We allow a max of %zu characters\n", sizeof(c->gateway_buffer));
>> >>
>> >> We typically use SIZE_SPECIFIER instead of z for compatibility with
>> >> ancient versions of MSVC.
>> >> (I don't know whether there is any supported version of MSVC that
>> >> doesn't support z; I don't use MSVC myself.)
>> >>
>> >
>> > Here too I was explicitly suggested to use %zu (when I was using - I
>> think
>> > - %lu before).
>> > So I assume that the ancient MSVC version you're referring to is
>> probably
>> > not supported anymore from an ffmpeg compiler requirement point of view?
>> >
>> > Should i now change all "%zu" to "%"SIZE_SPECIFIER (this does not make
>> it
>> > neater nor shorter with the 80 char line limit).
>> > Is this change required?
>>
>> As said: It is for compatibility with ancient versions of MSVC. But I
>> don't know whether any of the actually supported versions of MSVC still
>> need it.
>>
>> >
>> > If it is, what _exactly_ do i need to change it in? I see a
>> > couple different SIZE_SPECIFIER prefixes. I have no clue what to use
>> here.
>>
>> There is only one SIZE_SPECIFIER, namely SIZE_SPECIFIER; the
>> PTRDIFF_SPECIFIER (or whatever you see) is obviously not the thing to
>> use for size_t.
>>
>> I see some:
> %"SIZE_SPECIFIER"
>
> But also like this:
> %8"SIZE_SPECIFIER
> %5"SIZE_SPECIFIER
>
> I'm assuming they are for clipping?
>
> I really don't like having to change it to this as it:
> - Uglifies the code (and i doubt if it's even needed)
> - Directly contradicts earlier feedback
> - And due to the above gives me a feeling of wasting time
> - Fine to change it but I hope there isn't another person popping up with
> a question to change it yet again....
>
>
>> >
>> >
>> >>> +            ret = AVERROR(EINVAL);
>> >>> +            goto err;
>> >>> +        }
>> >>> +    } else {
>> >>> +        // Populate the IPFS gateway if we have any.
>> >>> +        // If not, inform the user how to properly set one.
>> >>> +        ret = populate_ipfs_gateway(h);
>> >>> +
>> >>> +        if (ret < 1) {
>> >>> +            // We fallback on dweb.link (managed by Protocol Labs).
>> >>> +            snprintf(c->gateway_buffer, sizeof(c->gateway_buffer), "
>> >> https://dweb.link");
>> >>> +
>> >>> +            av_log(h, AV_LOG_WARNING, "IPFS does not appear to be
>> >> running. You’re now using the public gateway at dweb.link.\n");
>> >>> +            av_log(h, AV_LOG_INFO, "Installing IPFS locally is
>> >> recommended to improve performance and reliability, and not share all
>> your
>> >> activity with a single IPFS gateway.\n");
>> >>> +            av_log(h, AV_LOG_INFO, "There are multiple options to
>> >> define this gateway.\n");
>> >>> +            av_log(h, AV_LOG_INFO, "1. Call ffmpeg with a gateway
>> >> param, without a trailing slash: -gateway <url>.\n");
>> >>> +            av_log(h, AV_LOG_INFO, "2. Define an $IPFS_GATEWAY
>> >> environment variable with the full HTTP URL to the gateway without
>> trailing
>> >> forward slash.\n");
>> >>> +            av_log(h, AV_LOG_INFO, "3. Define an $IPFS_PATH
>> environment
>> >> variable and point it to the IPFS data path - this is typically
>> ~/.ipfs\n");
>> >>
>> >> All those AV_LOG_INFO can be combined which has the advantage that the
>> >> logs can't be teared apart (which they can now if something else logs
>> at
>> >> the same time); furthermore, this would also decrease codesize.
>> >>
>> >
>> > Do you have an example of where that's happening?
>> >
>>
>> It can happen any time you have multiple threads using av_log at the
>> same time. Given that your statements are supposed to be full lines, it
>> is not that bad if it happens here, but it is nevertheless suboptimal.
>>
>
> I meant a multi-line av_log example ;) But yeah, I get how threading could
> screw this up.
>
>>
>> >>
>> >>> +        }
>> >>> +    }
>> >>> +
>> >>> +    // Test if the gateway starts with either http:// or https://
>> >>> +    if (av_stristart(c->gateway_buffer, "http://", NULL) == 0
>> >>> +        && av_stristart(c->gateway_buffer, "https://", NULL) == 0) {
>> >>> +        av_log(h, AV_LOG_WARNING, "The gateway URL didn't start with
>> >> http:// or https:// and is therefore invalid.\n");
>> >>> +        ret = AVERROR(EILSEQ);
>> >>> +        goto err;
>> >>> +    }
>> >>> +
>> >>> +    // Concatenate the url.
>> >>> +    // This ends up with something like:
>> >> http://localhost:8080/ipfs/Qm.....
>> >>> +    // The format of "%s%s%s%s" is the following:
>> >>> +    // 1st %s = The gateway.
>> >>> +    // 2nd %s = If the gateway didn't end in a slash, add a "/".
>> >> Otherwise it's an empty string
>> >>> +    // 3rd %s = Either ipns/ or ipfs/.
>> >>> +    // 4th %s = The IPFS CID (Qm..., bafy..., ...).
>> >>> +    fulluri = av_asprintf("%s%s%s%s",
>> >>> +                          c->gateway_buffer,
>> >>> +
>> (c->gateway_buffer[strlen(c->gateway_buffer)
>> >> - 1] == '/') ? "" : "/",
>> >>> +                          (is_ipns) ? "ipns/" : "ipfs/",
>> >>> +                          ipfs_cid);
>> >>
>> >> Missing allocation check.
>> >>
>> >
>> > Ah
>> > Fixed it locally.
>> >
>> >
>> >>
>> >>> +
>> >>> +    // Pass the URL back to FFMpeg's protocol handler.
>> >>> +    if ((ret = ffurl_open_whitelist(&c->inner, fulluri, flags,
>> >>> +                                    &h->interrupt_callback, options,
>> >>> +                                    h->protocol_whitelist,
>> >>> +                                    h->protocol_blacklist, h))
>> >>> +        < 0) {
>> >>
>> >> Weird formatting; why don't you just use
>> >> ret = ffurl_open_whitelist(...);
>> >> if (ret < 0) {
>> >>
>> >
>> > Fixed.
>> >
>> >
>> >>
>> >>> +        av_log(h, AV_LOG_WARNING, "Unable to open resource: %s\n",
>> >> fulluri);
>> >>> +        goto err;
>> >>> +    }
>> >>> +
>> >>> +err:
>> >>> +    av_free(fulluri);
>> >>> +    return ret;
>> >>> +}
>> >>> +
>> >>> +static int ipfs_read(URLContext *h, unsigned char *buf, int size)
>> >>> +{
>> >>> +    IPFSGatewayContext *c = h->priv_data;
>> >>> +    return ffurl_read(c->inner, buf, size);
>> >>> +}
>> >>> +
>> >>> +static int64_t ipfs_seek(URLContext *h, int64_t pos, int whence)
>> >>> +{
>> >>> +    IPFSGatewayContext *c = h->priv_data;
>> >>> +    return ffurl_seek(c->inner, pos, whence);
>> >>> +}
>> >>> +
>> >>> +static int ipfs_close(URLContext *h)
>> >>> +{
>> >>> +    IPFSGatewayContext *c = h->priv_data;
>> >>> +    return ffurl_closep(&c->inner);
>> >>> +}
>> >>> +
>> >>> +#define OFFSET(x) offsetof(IPFSGatewayContext, x)
>> >>> +
>> >>> +static const AVOption options[] = {
>> >>> +    {"gateway", "The gateway to ask for IPFS data.", OFFSET(gateway),
>> >> AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, AV_OPT_FLAG_DECODING_PARAM},
>> >>> +    {NULL},
>> >>> +};
>> >>> +
>> >>> +static const AVClass ipfs_context_class = {
>> >>> +    .class_name   = "IPFS",
>> >>> +    .item_name    = av_default_item_name,
>> >>> +    .option       = options,
>> >>> +    .version      = LIBAVUTIL_VERSION_INT,
>> >>> +};
>> >>> +
>> >>> +const URLProtocol ff_ipfs_protocol = {
>> >>> +    .name             = "ipfs",
>> >>> +    .url_open2        = translate_ipfs_to_http,
>> >>> +    .url_read         = ipfs_read,
>> >>> +    .url_seek         = ipfs_seek,
>> >>> +    .url_close        = ipfs_close,
>> >>> +    .priv_data_size   = sizeof(IPFSGatewayContext),
>> >>> +    .priv_data_class  = &ipfs_context_class,
>> >>> +};
>> >>> +
>> >>> +const URLProtocol ff_ipns_protocol = {
>> >>> +    .name             = "ipns",
>> >>> +    .url_open2        = translate_ipfs_to_http,
>> >>> +    .url_read         = ipfs_read,
>> >>> +    .url_seek         = ipfs_seek,
>> >>> +    .url_close        = ipfs_close,
>> >>> +    .priv_data_size   = sizeof(IPFSGatewayContext),
>> >>> +    .priv_data_class  = &ipfs_context_class,
>> >>> +};
>> >>> diff --git a/libavformat/protocols.c b/libavformat/protocols.c
>> >>> index d07563cd0c..6ee62a598a 100644
>> >>> --- a/libavformat/protocols.c
>> >>> +++ b/libavformat/protocols.c
>> >>> @@ -71,6 +71,8 @@ extern const URLProtocol ff_libsrt_protocol;
>> >>>  extern const URLProtocol ff_libssh_protocol;
>> >>>  extern const URLProtocol ff_libsmbclient_protocol;
>> >>>  extern const URLProtocol ff_libzmq_protocol;
>> >>> +extern const URLProtocol ff_ipfs_protocol;
>> >>> +extern const URLProtocol ff_ipns_protocol;
>> >>>
>> >>>  #include "libavformat/protocol_list.c"
>> >>>
>> >>
>> >>
>> > Thank you very much for your review!
>> > Please do hit me back with a reply on the questions I have still open.
>> >
>> > I'm honestly quite done with patching this over and over again (it's
>> open
>> > for months now) so I'd like to put the pace in these fixes and send an
>> > updated version today.
>> >
>>
>>
Ping Andreas, i'm waiting for your response on the questions above before i
send an updated patch.
Also ping for Tomas with regards to the same questions (%zu vs
SIZE_SPECIFIER and the merging of \r\n which i had split based on your
feedback)

Let's keep the pace :)
Andreas Rheinhardt March 31, 2022, 9:44 p.m. UTC | #6
Mark Gaiser:
> On Wed, Mar 30, 2022 at 3:57 PM Andreas Rheinhardt <
> andreas.rheinhardt@outlook.com> wrote:
> 
>> Mark Gaiser:
>>> On Wed, Mar 30, 2022 at 2:21 PM Andreas Rheinhardt <
>>> andreas.rheinhardt@outlook.com> wrote:
>>>
>>>> Mark Gaiser:
>>>>> This patch adds support for:
>>>>> - ffplay ipfs://<cid>
>>>>> - ffplay ipns://<cid>
>>>>>
>>>>> IPFS data can be played from so called "ipfs gateways".
>>>>> A gateway is essentially a webserver that gives access to the
>>>>> distributed IPFS network.
>>>>>
>>>>> This protocol support (ipfs and ipns) therefore translates
>>>>> ipfs:// and ipns:// to a http:// url. This resulting url is
>>>>> then handled by the http protocol. It could also be https
>>>>> depending on the gateway provided.
>>>>>
>>>>> To use this protocol, a gateway must be provided.
>>>>> If you do nothing it will try to find it in your
>>>>> $HOME/.ipfs/gateway file. The ways to set it manually are:
>>>>> 1. Define a -gateway <url> to the gateway.
>>>>> 2. Define $IPFS_GATEWAY with the full http link to the gateway.
>>>>> 3. Define $IPFS_PATH and point it to the IPFS data path.
>>>>> 4. Have IPFS running in your local user folder (under $HOME/.ipfs).
>>>>>
>>>>> Signed-off-by: Mark Gaiser <markg85@gmail.com>
>>>>> ---
>>>>>  configure                 |   2 +
>>>>>  doc/protocols.texi        |  30 ++++
>>>>>  libavformat/Makefile      |   2 +
>>>>>  libavformat/ipfsgateway.c | 309 ++++++++++++++++++++++++++++++++++++++
>>>>>  libavformat/protocols.c   |   2 +
>>>>>  5 files changed, 345 insertions(+)
>>>>>  create mode 100644 libavformat/ipfsgateway.c
>>>>>
>>>>> diff --git a/configure b/configure
>>>>> index e4d36aa639..55af90957a 100755
>>>>> --- a/configure
>>>>> +++ b/configure
>>>>> @@ -3579,6 +3579,8 @@ udp_protocol_select="network"
>>>>>  udplite_protocol_select="network"
>>>>>  unix_protocol_deps="sys_un_h"
>>>>>  unix_protocol_select="network"
>>>>> +ipfs_protocol_select="https_protocol"
>>>>> +ipns_protocol_select="https_protocol"
>>>>>
>>>>>  # external library protocols
>>>>>  libamqp_protocol_deps="librabbitmq"
>>>>> diff --git a/doc/protocols.texi b/doc/protocols.texi
>>>>> index d207df0b52..7c9c0a4808 100644
>>>>> --- a/doc/protocols.texi
>>>>> +++ b/doc/protocols.texi
>>>>> @@ -2025,5 +2025,35 @@ decoding errors.
>>>>>
>>>>>  @end table
>>>>>
>>>>> +@section ipfs
>>>>> +
>>>>> +InterPlanetary File System (IPFS) protocol support. One can access
>>>> files stored
>>>>> +on the IPFS network through so called gateways. Those are http(s)
>>>> endpoints.
>>>>> +This protocol wraps the IPFS native protocols (ipfs:// and ipns://) to
>>>> be send
>>>>> +to such a gateway. Users can (and should) host their own node which
>>>> means this
>>>>> +protocol will use your local machine gateway to access files on the
>>>> IPFS network.
>>>>> +
>>>>> +If a user doesn't have a node of their own then the public gateway
>>>> dweb.link is
>>>>> +used by default.
>>>>> +
>>>>> +You can use this protocol in 2 ways. Using IPFS:
>>>>> +@example
>>>>> +ffplay ipfs://QmbGtJg23skhvFmu9mJiePVByhfzu5rwo74MEkVDYAmF5T
>>>>> +@end example
>>>>> +
>>>>> +Or the IPNS protocol (IPNS is mutable IPFS):
>>>>> +@example
>>>>> +ffplay ipns://QmbGtJg23skhvFmu9mJiePVByhfzu5rwo74MEkVDYAmF5T
>>>>> +@end example
>>>>> +
>>>>> +You can also change the gateway to be used:
>>>>> +
>>>>> +@table @option
>>>>> +
>>>>> +@item gateway
>>>>> +Defines the gateway to use. When nothing is provided the protocol will
>>>> first try
>>>>> +your local gateway. If that fails dweb.link will be used.
>>>>> +
>>>>> +@end table
>>>>>
>>>>>  @c man end PROTOCOLS
>>>>> diff --git a/libavformat/Makefile b/libavformat/Makefile
>>>>> index d7182d6bd8..e3233fd7ac 100644
>>>>> --- a/libavformat/Makefile
>>>>> +++ b/libavformat/Makefile
>>>>> @@ -660,6 +660,8 @@ OBJS-$(CONFIG_SRTP_PROTOCOL)             +=
>>>> srtpproto.o srtp.o
>>>>>  OBJS-$(CONFIG_SUBFILE_PROTOCOL)          += subfile.o
>>>>>  OBJS-$(CONFIG_TEE_PROTOCOL)              += teeproto.o tee_common.o
>>>>>  OBJS-$(CONFIG_TCP_PROTOCOL)              += tcp.o
>>>>> +OBJS-$(CONFIG_IPFS_PROTOCOL)             += ipfsgateway.o
>>>>> +OBJS-$(CONFIG_IPNS_PROTOCOL)             += ipfsgateway.o
>>>>>  TLS-OBJS-$(CONFIG_GNUTLS)                += tls_gnutls.o
>>>>>  TLS-OBJS-$(CONFIG_LIBTLS)                += tls_libtls.o
>>>>>  TLS-OBJS-$(CONFIG_MBEDTLS)               += tls_mbedtls.o
>>>>> diff --git a/libavformat/ipfsgateway.c b/libavformat/ipfsgateway.c
>>>>> new file mode 100644
>>>>> index 0000000000..1a039589c0
>>>>> --- /dev/null
>>>>> +++ b/libavformat/ipfsgateway.c
>>>>> @@ -0,0 +1,309 @@
>>>>> +/*
>>>>> + * IPFS and IPNS protocol support through IPFS Gateway.
>>>>> + * Copyright (c) 2022 Mark Gaiser
>>>>> + *
>>>>> + * This file is part of FFmpeg.
>>>>> + *
>>>>> + * FFmpeg is free software; you can redistribute it and/or
>>>>> + * modify it under the terms of the GNU Lesser General Public
>>>>> + * License as published by the Free Software Foundation; either
>>>>> + * version 2.1 of the License, or (at your option) any later version.
>>>>> + *
>>>>> + * FFmpeg is distributed in the hope that it will be useful,
>>>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>>>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>>>> + * Lesser General Public License for more details.
>>>>> + *
>>>>> + * You should have received a copy of the GNU Lesser General Public
>>>>> + * License along with FFmpeg; if not, write to the Free Software
>>>>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
>>>> 02110-1301 USA
>>>>> + */
>>>>> +
>>>>> +#include "avformat.h"
>>>>> +#include "libavutil/avassert.h"
>>>>
>>>> Unused.
>>>>
>>>>> +#include "libavutil/avstring.h"
>>>>> +#include "libavutil/internal.h"
>>>>> +#include "libavutil/opt.h"
>>>>> +#include "libavutil/tree.h"
>>>>
>>>> ?
>>>>
>>>
>>> This whole include part can be cleaned up much more.
>>> Just having these:
>>> #include "libavutil/avstring.h"
>>> #include "libavutil/opt.h"
>>> #include "url.h"
>>> #include <sys/stat.h>
>>>
>>> is enough.
>>>
>>>
>>>>> +#include <fcntl.h>
>>>>> +#if HAVE_IO_H
>>>>> +#include <io.h>
>>>>> +#endif
>>>>> +#if HAVE_UNISTD_H
>>>>> +#include <unistd.h>
>>>>> +#endif
>>>>> +#include "os_support.h"
>>>>> +#include "url.h"
>>>>> +#include <stdlib.h>
>>>>> +#include <sys/stat.h>
>>>>> +
>>>>> +typedef struct IPFSGatewayContext {
>>>>> +    AVClass *class;
>>>>> +    URLContext *inner;
>>>>> +    // Is filled by the -gateway argument and not changed after.
>>>>> +    char *gateway;
>>>>> +    // If the above gateway is non null, it will be copied into this
>>>> buffer.
>>>>> +    // Else this buffer will contain the auto detected gateway.
>>>>> +    // In either case, the gateway to use will be in this buffer.
>>>>> +    char gateway_buffer[PATH_MAX];
>>>>> +} IPFSGatewayContext;
>>>>> +
>>>>> +// A best-effort way to find the IPFS gateway.
>>>>> +// Only the most appropiate gateway is set. It's not actually
>> requested
>>>>> +// (http call) to prevent a potential slowdown in startup. A potential
>>>> timeout
>>>>> +// is handled by the HTTP protocol.
>>>>> +static int populate_ipfs_gateway(URLContext *h)
>>>>> +{
>>>>> +    IPFSGatewayContext *c = h->priv_data;
>>>>> +    char ipfs_full_data_folder[PATH_MAX];
>>>>> +    char ipfs_gateway_file[PATH_MAX];
>>>>> +    struct stat st;
>>>>> +    int stat_ret = 0;
>>>>> +    int ret = AVERROR(EINVAL);
>>>>> +    FILE *gateway_file = NULL;
>>>>> +
>>>>> +    // Test $IPFS_GATEWAY.
>>>>> +    if (getenv("IPFS_GATEWAY") != NULL) {
>>>>> +        if (snprintf(c->gateway_buffer, sizeof(c->gateway_buffer),
>> "%s",
>>>>> +                     getenv("IPFS_GATEWAY")) >=
>>>> sizeof(c->gateway_buffer)) {
>>>>> +            av_log(h, AV_LOG_WARNING, "The IPFS_GATEWAY environment
>>>> variable exceeds the maximum length. We allow a max of %zu
>> characters\n",
>>>> sizeof(c->gateway_buffer));
>>>>> +            ret = AVERROR(EINVAL);
>>>>> +            goto err;
>>>>> +        }
>>>>> +
>>>>> +        ret = 1;
>>>>> +        goto err;
>>>>> +    } else
>>>>> +        av_log(h, AV_LOG_DEBUG, "$IPFS_GATEWAY is empty.\n");
>>>>> +
>>>>> +    // We need to know the IPFS folder to - eventually - read the
>>>> contents of
>>>>> +    // the "gateway" file which would tell us the gateway to use.
>>>>> +    if (getenv("IPFS_PATH") == NULL) {
>>>>> +        av_log(h, AV_LOG_DEBUG, "$IPFS_PATH is empty.\n");
>>>>> +
>>>>> +        // Try via the home folder.
>>>>> +        if (getenv("HOME") == NULL) {
>>>>> +            av_log(h, AV_LOG_WARNING, "$HOME appears to be empty.\n");
>>>>> +            ret = AVERROR(EINVAL);
>>>>> +            goto err;
>>>>> +        }
>>>>> +
>>>>> +        // Verify the composed path fits.
>>>>> +        if (snprintf(ipfs_full_data_folder,
>>>> sizeof(ipfs_full_data_folder),
>>>>> +                     "%s/.ipfs/", getenv("HOME")) >=
>>>> sizeof(ipfs_full_data_folder)) {
>>>>> +            av_log(h, AV_LOG_WARNING, "The IPFS data path exceeds the
>>>> max path length (%zu)\n", sizeof(ipfs_full_data_folder));
>>>>> +            ret = AVERROR(EINVAL);
>>>>> +            goto err;
>>>>> +        }
>>>>> +
>>>>> +        // Stat the folder.
>>>>> +        // It should exist in a default IPFS setup when run as local
>>>> user.
>>>>> +#ifndef _WIN32
>>>>> +        stat_ret = stat(ipfs_full_data_folder, &st);
>>>>> +#else
>>>>> +        stat_ret = win32_stat(ipfs_full_data_folder, &st);
>>>>> +#endif
>>>>> +        if (stat_ret < 0) {
>>>>> +            av_log(h, AV_LOG_INFO, "Unable to find IPFS folder. We
>>>> tried:\n");
>>>>> +            av_log(h, AV_LOG_INFO, "- $IPFS_PATH, which was
>> empty.\n");
>>>>> +            av_log(h, AV_LOG_INFO, "- $HOME/.ipfs (full uri: %s) which
>>>> doesn't exist.\n", ipfs_full_data_folder);
>>>>> +            ret = AVERROR(ENOENT);
>>>>> +            goto err;
>>>>> +        }
>>>>> +    } else {
>>>>> +        if (snprintf(ipfs_full_data_folder,
>>>> sizeof(ipfs_full_data_folder), "%s",
>>>>> +                 getenv("IPFS_PATH")) >=
>> sizeof(ipfs_full_data_folder))
>>>> {
>>>>> +            av_log(h, AV_LOG_WARNING, "The IPFS_PATH environment
>>>> variable exceeds the maximum length. We allow a max of %zu
>> characters\n",
>>>> sizeof(c->gateway_buffer));
>>>>> +            ret = AVERROR(EINVAL);
>>>>> +            goto err;
>>>>> +        }
>>>>> +
>>>>> +    }
>>>>> +
>>>>> +    // Copy the fully composed gateway path into ipfs_gateway_file.
>>>>> +    if (snprintf(ipfs_gateway_file, sizeof(ipfs_gateway_file),
>>>> "%sgateway",
>>>>> +                 ipfs_full_data_folder) >= sizeof(ipfs_gateway_file))
>> {
>>>>> +        av_log(h, AV_LOG_WARNING, "The IPFS gateway file path exceeds
>>>> the max path length (%zu)\n", sizeof(ipfs_gateway_file));
>>>>> +        ret = AVERROR(ENOENT);
>>>>> +        goto err;
>>>>> +    }
>>>>> +
>>>>> +    // Get the contents of the gateway file.
>>>>> +    gateway_file = av_fopen_utf8(ipfs_gateway_file, "r");
>>>>> +    if (!gateway_file) {
>>>>> +        av_log(h, AV_LOG_WARNING, "The IPFS gateway file (full uri:
>> %s)
>>>> doesn't exist. Is the gateway enabled?\n", ipfs_gateway_file);
>>>>> +        ret = AVERROR(ENOENT);
>>>>> +        goto err;
>>>>> +    }
>>>>> +
>>>>> +    // Read a single line (fgets stops at new line mark).
>>>>> +    fgets(c->gateway_buffer, sizeof(c->gateway_buffer) - 1,
>>>> gateway_file);
>>>>> +
>>>>> +    // Replace the last char with \0
>>>>> +    c->gateway_buffer[sizeof(c->gateway_buffer) - 1] = 0;

Btw: fgets normally zero-terminates the buffer (i.e. it reads at most
(sizeof(c->gateway_buffer) - 1) - 1 characters from the stream and adds
a \0. But on a read error it doesn't do this, instead the contents of
the buffer are indeterminate and so should not be used. Adding a zero at
the very end of gateway_buffer does not help to enforce this.

fgets returns NULL on read error and if nothing could be read due to
immediate EOF; so you should check for that.
(Btw: A protocol's context is zeroed initially, so
c->gateway_buffer[sizeof(c->gateway_buffer) - 1] is already zero,
because no one ever wrote anything else there.)

>>>>> +
>>>>> +    // Replace first occurence of end of line with \0
>>>>> +    c->gateway_buffer[strcspn(c->gateway_buffer, "\r")] = 0;
>>>>> +    c->gateway_buffer[strcspn(c->gateway_buffer, "\n")] = 0;
>>>>
>>>> If the buffer contains both \r and \n and the first \n precedes the
>>>> first \r, then the above zeroes both the first \r and the first \n. If
>>>> it is enough to only zero the first newline, then this can be simplified
>>>> to "c->gateway_buffer[strcspn(c->gateway_buffer, "\r\n")] = 0;".
>>>>
>>>
>>> It used to be in your suggested simplified approach.
>>> It was then suggested to split it because of new line differences
>>> on different platforms. This was supposed to catch "everything".
>>>
>>> I prefer to keep this as-is now.
>>>
>>
>> Is it intended to use what is between the first \n and the first \r
>> lateron although this data will be after the '\0' after this? That would
>> be very weird.
>>
> 
> You have to elaborate on that as that sounds really wrong to me.
> 

c->gateway_buffer[strcspn(c->gateway_buffer, "\r\n")] = 0;
overwrites the first \r or \n (whichever is first) with '\0' (if any;
otherwise it overwrites the terminating \0 with \0.

c->gateway_buffer[strcspn(c->gateway_buffer, "\r")] = 0;
c->gateway_buffer[strcspn(c->gateway_buffer, "\n")] = 0;
also does this, but in case that the string originally contained both a
\r and a \n with the first \n preceding the first \r it would also zero
the first \r; this position is after the first terminating \0 (those at
the position where the first \n was), so it appeared to me that you
intended to use what is after the first \0. Because that is the only
difference between the approach with two calls to strcspn and the one
with one call to strcspn.

But then I read that this has been suggested to you and found the mail
(https://ffmpeg.org/pipermail/ffmpeg-devel/2022-February/292590.html)
this referred to. There seems to be a misunderstanding in the semantics
of strcspn (most likely a confusion with strstr):
strcspn(c->gateway_buffer, "\r\n") works with single \r and single \n,
there need not be a single "\r\n" substring. Both methods would produce
strings that compare equal according to strcmp().

> If I'm correct a file doesn't start with either \r or \n. Unless you have
> intentional line breaks.
> It ends with those, not starts. I'm just going to ignore \r for now for
> brevity.
> 
> So if you have:
> \n<your gateway>\n
> 
> Would be translated to:
> \0<your gateway>\n
> 
> Then indeed you won't get a gateway because it's intended that your first
> line is that gateway.
> 
> It must be in this format:
> <your gateway>\n
> 
> or without a newline
> <your gateway>
> 
> There is no blank line trimming to fix user screwups.
> Users are not intended to touch that file anyhow. It's auto-generated by
> IPFS and removed once it shuts down.
> 
> 
>>>
>>>>> +
>>>>> +    // If strlen finds anything longer then 0 characters then we have
>> a
>>>>> +    // potential gateway url.
>>>>> +    if (strlen(c->gateway_buffer) < 1) {
>>>>
>>>> if (*c->gateway_buffer == '\0')
>>>>
>>>
>>> Is that a style difference or an actual behavior difference?
>>>
>>
>> The former would call strlen to check whether a string is empty (unless
>> the compiler optimizes it away). No actual behaviour difference exists.
>> But it is nevertheless more than a style difference.
>>
> 
> Fixed locally.
> 
>>
>>>
>>>>
>>>>> +        av_log(h, AV_LOG_WARNING, "The IPFS gateway file (full uri:
>> %s)
>>>> appears to be empty. Is the gateway started?\n", ipfs_gateway_file);
>>>>> +        ret = AVERROR(EILSEQ);
>>>>> +        goto err;
>>>>> +    } else {
>>>>> +        // We're done, the c->gateway_buffer has something that looks
>>>> valid.
>>>>> +        ret = 1;
>>>>> +        goto err;
>>>>> +    }
>>>>> +
>>>>> +err:
>>>>> +    if (gateway_file)
>>>>> +        fclose(gateway_file);
>>>>> +
>>>>> +    return ret;
>>>>> +}
>>>>> +
>>>>> +static int translate_ipfs_to_http(URLContext *h, const char *uri,
>>>>> +                                  int flags, AVDictionary **options)
>>>>> +{
>>>>> +    const char *ipfs_cid;
>>>>> +    char *fulluri = NULL;
>>>>> +    int ret;
>>>>> +    IPFSGatewayContext *c = h->priv_data;
>>>>> +
>>>>> +    // Test for ipfs://, ipfs:, ipns:// and ipns:. This prefix is
>>>> stripped from
>>>>> +    // the string leaving just the CID in ipfs_cid.
>>>>> +    int is_ipfs = av_stristart(uri, "ipfs://", &ipfs_cid);
>>>>> +    int is_ipns = av_stristart(uri, "ipns://", &ipfs_cid);
>>>>> +
>>>>> +    // We must have either ipns or ipfs.
>>>>> +    if (!is_ipfs && !is_ipns) {
>>>>> +        ret = AVERROR(EINVAL);
>>>>> +        av_log(h, AV_LOG_WARNING, "Unsupported url %s\n", uri);
>>>>> +        goto err;
>>>>> +    }
>>>>> +
>>>>> +    // If the CID has a length greater then 0 then we assume we have a
>>>> proper working one.
>>>>> +    // It could still be wrong but in that case the gateway should
>> save
>>>> us and
>>>>> +    // ruturn a 403 error. The http protocol handles this.
>>>>> +    if (strlen(ipfs_cid) < 1) {
>>>>> +        av_log(h, AV_LOG_WARNING, "A CID must be provided.\n");
>>>>> +        ret = AVERROR(EILSEQ);
>>>>> +        goto err;
>>>>> +    }
>>>>> +
>>>>> +    // Populate c->gateway_buffer with whatever is in c->gateway
>>>>> +    if (c->gateway != NULL) {
>>>>> +        if (snprintf(c->gateway_buffer, sizeof(c->gateway_buffer),
>> "%s",
>>>>> +                     c->gateway) >= sizeof(c->gateway_buffer)) {
>>>>> +            av_log(h, AV_LOG_WARNING, "The -gateway parameter is too
>>>> long. We allow a max of %zu characters\n", sizeof(c->gateway_buffer));
>>>>
>>>> We typically use SIZE_SPECIFIER instead of z for compatibility with
>>>> ancient versions of MSVC.
>>>> (I don't know whether there is any supported version of MSVC that
>>>> doesn't support z; I don't use MSVC myself.)
>>>>
>>>
>>> Here too I was explicitly suggested to use %zu (when I was using - I
>> think
>>> - %lu before).
>>> So I assume that the ancient MSVC version you're referring to is probably
>>> not supported anymore from an ffmpeg compiler requirement point of view?
>>>
>>> Should i now change all "%zu" to "%"SIZE_SPECIFIER (this does not make it
>>> neater nor shorter with the 80 char line limit).
>>> Is this change required?
>>
>> As said: It is for compatibility with ancient versions of MSVC. But I
>> don't know whether any of the actually supported versions of MSVC still
>> need it.
>>
>>>
>>> If it is, what _exactly_ do i need to change it in? I see a
>>> couple different SIZE_SPECIFIER prefixes. I have no clue what to use
>> here.
>>
>> There is only one SIZE_SPECIFIER, namely SIZE_SPECIFIER; the
>> PTRDIFF_SPECIFIER (or whatever you see) is obviously not the thing to
>> use for size_t.
>>
>> I see some:
> %"SIZE_SPECIFIER"
> 
> But also like this:
> %8"SIZE_SPECIFIER
> %5"SIZE_SPECIFIER
> 
> I'm assuming they are for clipping?

printf specifiers consist of flags, minimum field width, precision
(indicated by '.'), length modifier and conversion specifier (in that
order). Like the macros in inttypes.h SIZE_SPECIFIER expands to a
character string literal that includes a length modifier and a
conversion specifier, so that one can use this together with the former
three possibilities.
Of course, it would make no sense to use any of these three here, so it
would just be %"SIZE_SPECIFIER".

> 
> I really don't like having to change it to this as it:
> - Uglifies the code (and i doubt if it's even needed)

I agree to both points. Which is why I am not forcing you to do this. It
seems that there is already at least one commonly compiled instance
where it is not used at all (libavformat/argo_cvg.c).

> - Directly contradicts earlier feedback

I don't agree with that given that SIZE_SPECIFIER is basically just a
wrapper around zu.

> - And due to the above gives me a feeling of wasting time
> - Fine to change it but I hope there isn't another person popping up with a
> question to change it yet again....
> 
> 
>>>
>>>
>>>>> +            ret = AVERROR(EINVAL);
>>>>> +            goto err;
>>>>> +        }
>>>>> +    } else {
>>>>> +        // Populate the IPFS gateway if we have any.
>>>>> +        // If not, inform the user how to properly set one.
>>>>> +        ret = populate_ipfs_gateway(h);
>>>>> +
>>>>> +        if (ret < 1) {
>>>>> +            // We fallback on dweb.link (managed by Protocol Labs).
>>>>> +            snprintf(c->gateway_buffer, sizeof(c->gateway_buffer), "
>>>> https://dweb.link");
>>>>> +
>>>>> +            av_log(h, AV_LOG_WARNING, "IPFS does not appear to be
>>>> running. You’re now using the public gateway at dweb.link.\n");
>>>>> +            av_log(h, AV_LOG_INFO, "Installing IPFS locally is
>>>> recommended to improve performance and reliability, and not share all
>> your
>>>> activity with a single IPFS gateway.\n");
>>>>> +            av_log(h, AV_LOG_INFO, "There are multiple options to
>>>> define this gateway.\n");
>>>>> +            av_log(h, AV_LOG_INFO, "1. Call ffmpeg with a gateway
>>>> param, without a trailing slash: -gateway <url>.\n");
>>>>> +            av_log(h, AV_LOG_INFO, "2. Define an $IPFS_GATEWAY
>>>> environment variable with the full HTTP URL to the gateway without
>> trailing
>>>> forward slash.\n");
>>>>> +            av_log(h, AV_LOG_INFO, "3. Define an $IPFS_PATH
>> environment
>>>> variable and point it to the IPFS data path - this is typically
>> ~/.ipfs\n");
>>>>
>>>> All those AV_LOG_INFO can be combined which has the advantage that the
>>>> logs can't be teared apart (which they can now if something else logs at
>>>> the same time); furthermore, this would also decrease codesize.
>>>>
>>>
>>> Do you have an example of where that's happening?
>>>
>>
>> It can happen any time you have multiple threads using av_log at the
>> same time. Given that your statements are supposed to be full lines, it
>> is not that bad if it happens here, but it is nevertheless suboptimal.
>>
> 
> I meant a multi-line av_log example ;) But yeah, I get how threading could
> screw this up.
> 

I meant something like:
av_log(h, AV_LOG_INFO, "Installing IPFS locally is recommended to
improve performance and reliability, and not share all your activity
with a single IPFS gateway.\n"
       "There are multiple options to define this gateway.\n"
       "1. Call ffmpeg with a gateway param, without a trailing slash:
-gateway <url>.\n"
       "2. Define an $IPFS_GATEWAY environment variable with the full
HTTP URL to the gateway without trailing forward slash.\n"
       "3. Define an $IPFS_PATH environment variable and point it to the
IPFS data path - this is typically ~/.ipfs\n");

(These lines are actually way too long.)

- Andreas
Mark Gaiser March 31, 2022, 10:17 p.m. UTC | #7
On Thu, Mar 31, 2022 at 11:44 PM Andreas Rheinhardt <
andreas.rheinhardt@outlook.com> wrote:

> Mark Gaiser:
> > On Wed, Mar 30, 2022 at 3:57 PM Andreas Rheinhardt <
> > andreas.rheinhardt@outlook.com> wrote:
> >
> >> Mark Gaiser:
> >>> On Wed, Mar 30, 2022 at 2:21 PM Andreas Rheinhardt <
> >>> andreas.rheinhardt@outlook.com> wrote:
> >>>
> >>>> Mark Gaiser:
> >>>>> This patch adds support for:
> >>>>> - ffplay ipfs://<cid>
> >>>>> - ffplay ipns://<cid>
> >>>>>
> >>>>> IPFS data can be played from so called "ipfs gateways".
> >>>>> A gateway is essentially a webserver that gives access to the
> >>>>> distributed IPFS network.
> >>>>>
> >>>>> This protocol support (ipfs and ipns) therefore translates
> >>>>> ipfs:// and ipns:// to a http:// url. This resulting url is
> >>>>> then handled by the http protocol. It could also be https
> >>>>> depending on the gateway provided.
> >>>>>
> >>>>> To use this protocol, a gateway must be provided.
> >>>>> If you do nothing it will try to find it in your
> >>>>> $HOME/.ipfs/gateway file. The ways to set it manually are:
> >>>>> 1. Define a -gateway <url> to the gateway.
> >>>>> 2. Define $IPFS_GATEWAY with the full http link to the gateway.
> >>>>> 3. Define $IPFS_PATH and point it to the IPFS data path.
> >>>>> 4. Have IPFS running in your local user folder (under $HOME/.ipfs).
> >>>>>
> >>>>> Signed-off-by: Mark Gaiser <markg85@gmail.com>
> >>>>> ---
> >>>>>  configure                 |   2 +
> >>>>>  doc/protocols.texi        |  30 ++++
> >>>>>  libavformat/Makefile      |   2 +
> >>>>>  libavformat/ipfsgateway.c | 309
> ++++++++++++++++++++++++++++++++++++++
> >>>>>  libavformat/protocols.c   |   2 +
> >>>>>  5 files changed, 345 insertions(+)
> >>>>>  create mode 100644 libavformat/ipfsgateway.c
> >>>>>
> >>>>> diff --git a/configure b/configure
> >>>>> index e4d36aa639..55af90957a 100755
> >>>>> --- a/configure
> >>>>> +++ b/configure
> >>>>> @@ -3579,6 +3579,8 @@ udp_protocol_select="network"
> >>>>>  udplite_protocol_select="network"
> >>>>>  unix_protocol_deps="sys_un_h"
> >>>>>  unix_protocol_select="network"
> >>>>> +ipfs_protocol_select="https_protocol"
> >>>>> +ipns_protocol_select="https_protocol"
> >>>>>
> >>>>>  # external library protocols
> >>>>>  libamqp_protocol_deps="librabbitmq"
> >>>>> diff --git a/doc/protocols.texi b/doc/protocols.texi
> >>>>> index d207df0b52..7c9c0a4808 100644
> >>>>> --- a/doc/protocols.texi
> >>>>> +++ b/doc/protocols.texi
> >>>>> @@ -2025,5 +2025,35 @@ decoding errors.
> >>>>>
> >>>>>  @end table
> >>>>>
> >>>>> +@section ipfs
> >>>>> +
> >>>>> +InterPlanetary File System (IPFS) protocol support. One can access
> >>>> files stored
> >>>>> +on the IPFS network through so called gateways. Those are http(s)
> >>>> endpoints.
> >>>>> +This protocol wraps the IPFS native protocols (ipfs:// and ipns://)
> to
> >>>> be send
> >>>>> +to such a gateway. Users can (and should) host their own node which
> >>>> means this
> >>>>> +protocol will use your local machine gateway to access files on the
> >>>> IPFS network.
> >>>>> +
> >>>>> +If a user doesn't have a node of their own then the public gateway
> >>>> dweb.link is
> >>>>> +used by default.
> >>>>> +
> >>>>> +You can use this protocol in 2 ways. Using IPFS:
> >>>>> +@example
> >>>>> +ffplay ipfs://QmbGtJg23skhvFmu9mJiePVByhfzu5rwo74MEkVDYAmF5T
> >>>>> +@end example
> >>>>> +
> >>>>> +Or the IPNS protocol (IPNS is mutable IPFS):
> >>>>> +@example
> >>>>> +ffplay ipns://QmbGtJg23skhvFmu9mJiePVByhfzu5rwo74MEkVDYAmF5T
> >>>>> +@end example
> >>>>> +
> >>>>> +You can also change the gateway to be used:
> >>>>> +
> >>>>> +@table @option
> >>>>> +
> >>>>> +@item gateway
> >>>>> +Defines the gateway to use. When nothing is provided the protocol
> will
> >>>> first try
> >>>>> +your local gateway. If that fails dweb.link will be used.
> >>>>> +
> >>>>> +@end table
> >>>>>
> >>>>>  @c man end PROTOCOLS
> >>>>> diff --git a/libavformat/Makefile b/libavformat/Makefile
> >>>>> index d7182d6bd8..e3233fd7ac 100644
> >>>>> --- a/libavformat/Makefile
> >>>>> +++ b/libavformat/Makefile
> >>>>> @@ -660,6 +660,8 @@ OBJS-$(CONFIG_SRTP_PROTOCOL)             +=
> >>>> srtpproto.o srtp.o
> >>>>>  OBJS-$(CONFIG_SUBFILE_PROTOCOL)          += subfile.o
> >>>>>  OBJS-$(CONFIG_TEE_PROTOCOL)              += teeproto.o tee_common.o
> >>>>>  OBJS-$(CONFIG_TCP_PROTOCOL)              += tcp.o
> >>>>> +OBJS-$(CONFIG_IPFS_PROTOCOL)             += ipfsgateway.o
> >>>>> +OBJS-$(CONFIG_IPNS_PROTOCOL)             += ipfsgateway.o
> >>>>>  TLS-OBJS-$(CONFIG_GNUTLS)                += tls_gnutls.o
> >>>>>  TLS-OBJS-$(CONFIG_LIBTLS)                += tls_libtls.o
> >>>>>  TLS-OBJS-$(CONFIG_MBEDTLS)               += tls_mbedtls.o
> >>>>> diff --git a/libavformat/ipfsgateway.c b/libavformat/ipfsgateway.c
> >>>>> new file mode 100644
> >>>>> index 0000000000..1a039589c0
> >>>>> --- /dev/null
> >>>>> +++ b/libavformat/ipfsgateway.c
> >>>>> @@ -0,0 +1,309 @@
> >>>>> +/*
> >>>>> + * IPFS and IPNS protocol support through IPFS Gateway.
> >>>>> + * Copyright (c) 2022 Mark Gaiser
> >>>>> + *
> >>>>> + * This file is part of FFmpeg.
> >>>>> + *
> >>>>> + * FFmpeg is free software; you can redistribute it and/or
> >>>>> + * modify it under the terms of the GNU Lesser General Public
> >>>>> + * License as published by the Free Software Foundation; either
> >>>>> + * version 2.1 of the License, or (at your option) any later
> version.
> >>>>> + *
> >>>>> + * FFmpeg is distributed in the hope that it will be useful,
> >>>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> >>>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> >>>>> + * Lesser General Public License for more details.
> >>>>> + *
> >>>>> + * You should have received a copy of the GNU Lesser General Public
> >>>>> + * License along with FFmpeg; if not, write to the Free Software
> >>>>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> >>>> 02110-1301 USA
> >>>>> + */
> >>>>> +
> >>>>> +#include "avformat.h"
> >>>>> +#include "libavutil/avassert.h"
> >>>>
> >>>> Unused.
> >>>>
> >>>>> +#include "libavutil/avstring.h"
> >>>>> +#include "libavutil/internal.h"
> >>>>> +#include "libavutil/opt.h"
> >>>>> +#include "libavutil/tree.h"
> >>>>
> >>>> ?
> >>>>
> >>>
> >>> This whole include part can be cleaned up much more.
> >>> Just having these:
> >>> #include "libavutil/avstring.h"
> >>> #include "libavutil/opt.h"
> >>> #include "url.h"
> >>> #include <sys/stat.h>
> >>>
> >>> is enough.
> >>>
> >>>
> >>>>> +#include <fcntl.h>
> >>>>> +#if HAVE_IO_H
> >>>>> +#include <io.h>
> >>>>> +#endif
> >>>>> +#if HAVE_UNISTD_H
> >>>>> +#include <unistd.h>
> >>>>> +#endif
> >>>>> +#include "os_support.h"
> >>>>> +#include "url.h"
> >>>>> +#include <stdlib.h>
> >>>>> +#include <sys/stat.h>
> >>>>> +
> >>>>> +typedef struct IPFSGatewayContext {
> >>>>> +    AVClass *class;
> >>>>> +    URLContext *inner;
> >>>>> +    // Is filled by the -gateway argument and not changed after.
> >>>>> +    char *gateway;
> >>>>> +    // If the above gateway is non null, it will be copied into this
> >>>> buffer.
> >>>>> +    // Else this buffer will contain the auto detected gateway.
> >>>>> +    // In either case, the gateway to use will be in this buffer.
> >>>>> +    char gateway_buffer[PATH_MAX];
> >>>>> +} IPFSGatewayContext;
> >>>>> +
> >>>>> +// A best-effort way to find the IPFS gateway.
> >>>>> +// Only the most appropiate gateway is set. It's not actually
> >> requested
> >>>>> +// (http call) to prevent a potential slowdown in startup. A
> potential
> >>>> timeout
> >>>>> +// is handled by the HTTP protocol.
> >>>>> +static int populate_ipfs_gateway(URLContext *h)
> >>>>> +{
> >>>>> +    IPFSGatewayContext *c = h->priv_data;
> >>>>> +    char ipfs_full_data_folder[PATH_MAX];
> >>>>> +    char ipfs_gateway_file[PATH_MAX];
> >>>>> +    struct stat st;
> >>>>> +    int stat_ret = 0;
> >>>>> +    int ret = AVERROR(EINVAL);
> >>>>> +    FILE *gateway_file = NULL;
> >>>>> +
> >>>>> +    // Test $IPFS_GATEWAY.
> >>>>> +    if (getenv("IPFS_GATEWAY") != NULL) {
> >>>>> +        if (snprintf(c->gateway_buffer, sizeof(c->gateway_buffer),
> >> "%s",
> >>>>> +                     getenv("IPFS_GATEWAY")) >=
> >>>> sizeof(c->gateway_buffer)) {
> >>>>> +            av_log(h, AV_LOG_WARNING, "The IPFS_GATEWAY environment
> >>>> variable exceeds the maximum length. We allow a max of %zu
> >> characters\n",
> >>>> sizeof(c->gateway_buffer));
> >>>>> +            ret = AVERROR(EINVAL);
> >>>>> +            goto err;
> >>>>> +        }
> >>>>> +
> >>>>> +        ret = 1;
> >>>>> +        goto err;
> >>>>> +    } else
> >>>>> +        av_log(h, AV_LOG_DEBUG, "$IPFS_GATEWAY is empty.\n");
> >>>>> +
> >>>>> +    // We need to know the IPFS folder to - eventually - read the
> >>>> contents of
> >>>>> +    // the "gateway" file which would tell us the gateway to use.
> >>>>> +    if (getenv("IPFS_PATH") == NULL) {
> >>>>> +        av_log(h, AV_LOG_DEBUG, "$IPFS_PATH is empty.\n");
> >>>>> +
> >>>>> +        // Try via the home folder.
> >>>>> +        if (getenv("HOME") == NULL) {
> >>>>> +            av_log(h, AV_LOG_WARNING, "$HOME appears to be
> empty.\n");
> >>>>> +            ret = AVERROR(EINVAL);
> >>>>> +            goto err;
> >>>>> +        }
> >>>>> +
> >>>>> +        // Verify the composed path fits.
> >>>>> +        if (snprintf(ipfs_full_data_folder,
> >>>> sizeof(ipfs_full_data_folder),
> >>>>> +                     "%s/.ipfs/", getenv("HOME")) >=
> >>>> sizeof(ipfs_full_data_folder)) {
> >>>>> +            av_log(h, AV_LOG_WARNING, "The IPFS data path exceeds
> the
> >>>> max path length (%zu)\n", sizeof(ipfs_full_data_folder));
> >>>>> +            ret = AVERROR(EINVAL);
> >>>>> +            goto err;
> >>>>> +        }
> >>>>> +
> >>>>> +        // Stat the folder.
> >>>>> +        // It should exist in a default IPFS setup when run as local
> >>>> user.
> >>>>> +#ifndef _WIN32
> >>>>> +        stat_ret = stat(ipfs_full_data_folder, &st);
> >>>>> +#else
> >>>>> +        stat_ret = win32_stat(ipfs_full_data_folder, &st);
> >>>>> +#endif
> >>>>> +        if (stat_ret < 0) {
> >>>>> +            av_log(h, AV_LOG_INFO, "Unable to find IPFS folder. We
> >>>> tried:\n");
> >>>>> +            av_log(h, AV_LOG_INFO, "- $IPFS_PATH, which was
> >> empty.\n");
> >>>>> +            av_log(h, AV_LOG_INFO, "- $HOME/.ipfs (full uri: %s)
> which
> >>>> doesn't exist.\n", ipfs_full_data_folder);
> >>>>> +            ret = AVERROR(ENOENT);
> >>>>> +            goto err;
> >>>>> +        }
> >>>>> +    } else {
> >>>>> +        if (snprintf(ipfs_full_data_folder,
> >>>> sizeof(ipfs_full_data_folder), "%s",
> >>>>> +                 getenv("IPFS_PATH")) >=
> >> sizeof(ipfs_full_data_folder))
> >>>> {
> >>>>> +            av_log(h, AV_LOG_WARNING, "The IPFS_PATH environment
> >>>> variable exceeds the maximum length. We allow a max of %zu
> >> characters\n",
> >>>> sizeof(c->gateway_buffer));
> >>>>> +            ret = AVERROR(EINVAL);
> >>>>> +            goto err;
> >>>>> +        }
> >>>>> +
> >>>>> +    }
> >>>>> +
> >>>>> +    // Copy the fully composed gateway path into ipfs_gateway_file.
> >>>>> +    if (snprintf(ipfs_gateway_file, sizeof(ipfs_gateway_file),
> >>>> "%sgateway",
> >>>>> +                 ipfs_full_data_folder) >=
> sizeof(ipfs_gateway_file))
> >> {
> >>>>> +        av_log(h, AV_LOG_WARNING, "The IPFS gateway file path
> exceeds
> >>>> the max path length (%zu)\n", sizeof(ipfs_gateway_file));
> >>>>> +        ret = AVERROR(ENOENT);
> >>>>> +        goto err;
> >>>>> +    }
> >>>>> +
> >>>>> +    // Get the contents of the gateway file.
> >>>>> +    gateway_file = av_fopen_utf8(ipfs_gateway_file, "r");
> >>>>> +    if (!gateway_file) {
> >>>>> +        av_log(h, AV_LOG_WARNING, "The IPFS gateway file (full uri:
> >> %s)
> >>>> doesn't exist. Is the gateway enabled?\n", ipfs_gateway_file);
> >>>>> +        ret = AVERROR(ENOENT);
> >>>>> +        goto err;
> >>>>> +    }
> >>>>> +
> >>>>> +    // Read a single line (fgets stops at new line mark).
> >>>>> +    fgets(c->gateway_buffer, sizeof(c->gateway_buffer) - 1,
> >>>> gateway_file);
> >>>>> +
> >>>>> +    // Replace the last char with \0
> >>>>> +    c->gateway_buffer[sizeof(c->gateway_buffer) - 1] = 0;
>
> Btw: fgets normally zero-terminates the buffer (i.e. it reads at most
> (sizeof(c->gateway_buffer) - 1) - 1 characters from the stream and adds
> a \0. But on a read error it doesn't do this, instead the contents of
> the buffer are indeterminate and so should not be used. Adding a zero at
> the very end of gateway_buffer does not help to enforce this.


> fgets returns NULL on read error and if nothing could be read due to
> immediate EOF; so you should check for that.
> (Btw: A protocol's context is zeroed initially, so
> c->gateway_buffer[sizeof(c->gateway_buffer) - 1] is already zero,
> because no one ever wrote anything else there.)
>

Nice one! Nobody caught that before.
So checking on NULL for fgets and removing the line to add a 0 at the end
is the appropriate fix here I assume.

Changed to what I assumed. Please correct me if I'm wrong.


>
> >>>>> +
> >>>>> +    // Replace first occurence of end of line with \0
> >>>>> +    c->gateway_buffer[strcspn(c->gateway_buffer, "\r")] = 0;
> >>>>> +    c->gateway_buffer[strcspn(c->gateway_buffer, "\n")] = 0;
> >>>>
> >>>> If the buffer contains both \r and \n and the first \n precedes the
> >>>> first \r, then the above zeroes both the first \r and the first \n. If
> >>>> it is enough to only zero the first newline, then this can be
> simplified
> >>>> to "c->gateway_buffer[strcspn(c->gateway_buffer, "\r\n")] = 0;".
> >>>>
> >>>
> >>> It used to be in your suggested simplified approach.
> >>> It was then suggested to split it because of new line differences
> >>> on different platforms. This was supposed to catch "everything".
> >>>
> >>> I prefer to keep this as-is now.
> >>>
> >>
> >> Is it intended to use what is between the first \n and the first \r
> >> lateron although this data will be after the '\0' after this? That would
> >> be very weird.
> >>
> >
> > You have to elaborate on that as that sounds really wrong to me.
> >
>
> c->gateway_buffer[strcspn(c->gateway_buffer, "\r\n")] = 0;
> overwrites the first \r or \n (whichever is first) with '\0' (if any;
> otherwise it overwrites the terminating \0 with \0.
>
> c->gateway_buffer[strcspn(c->gateway_buffer, "\r")] = 0;
> c->gateway_buffer[strcspn(c->gateway_buffer, "\n")] = 0;
> also does this, but in case that the string originally contained both a
> \r and a \n with the first \n preceding the first \r it would also zero
> the first \r; this position is after the first terminating \0 (those at
> the position where the first \n was), so it appeared to me that you
> intended to use what is after the first \0. Because that is the only
> difference between the approach with two calls to strcspn and the one
> with one call to strcspn.
>
> But then I read that this has been suggested to you and found the mail
> (https://ffmpeg.org/pipermail/ffmpeg-devel/2022-February/292590.html)
> this referred to. There seems to be a misunderstanding in the semantics
> of strcspn (most likely a confusion with strstr):
> strcspn(c->gateway_buffer, "\r\n") works with single \r and single \n,
> there need not be a single "\r\n" substring. Both methods would produce
> strings that compare equal according to strcmp().
>
>
Thank you for your elaborate explanation here!
But now I'm not sure what the resolution - if any - should be?


> > If I'm correct a file doesn't start with either \r or \n. Unless you have
> > intentional line breaks.
> > It ends with those, not starts. I'm just going to ignore \r for now for
> > brevity.
> >
> > So if you have:
> > \n<your gateway>\n
> >
> > Would be translated to:
> > \0<your gateway>\n
> >
> > Then indeed you won't get a gateway because it's intended that your first
> > line is that gateway.
> >
> > It must be in this format:
> > <your gateway>\n
> >
> > or without a newline
> > <your gateway>
> >
> > There is no blank line trimming to fix user screwups.
> > Users are not intended to touch that file anyhow. It's auto-generated by
> > IPFS and removed once it shuts down.
> >
> >
> >>>
> >>>>> +
> >>>>> +    // If strlen finds anything longer then 0 characters then we
> have
> >> a
> >>>>> +    // potential gateway url.
> >>>>> +    if (strlen(c->gateway_buffer) < 1) {
> >>>>
> >>>> if (*c->gateway_buffer == '\0')
> >>>>
> >>>
> >>> Is that a style difference or an actual behavior difference?
> >>>
> >>
> >> The former would call strlen to check whether a string is empty (unless
> >> the compiler optimizes it away). No actual behaviour difference exists.
> >> But it is nevertheless more than a style difference.
> >>
> >
> > Fixed locally.
> >
> >>
> >>>
> >>>>
> >>>>> +        av_log(h, AV_LOG_WARNING, "The IPFS gateway file (full uri:
> >> %s)
> >>>> appears to be empty. Is the gateway started?\n", ipfs_gateway_file);
> >>>>> +        ret = AVERROR(EILSEQ);
> >>>>> +        goto err;
> >>>>> +    } else {
> >>>>> +        // We're done, the c->gateway_buffer has something that
> looks
> >>>> valid.
> >>>>> +        ret = 1;
> >>>>> +        goto err;
> >>>>> +    }
> >>>>> +
> >>>>> +err:
> >>>>> +    if (gateway_file)
> >>>>> +        fclose(gateway_file);
> >>>>> +
> >>>>> +    return ret;
> >>>>> +}
> >>>>> +
> >>>>> +static int translate_ipfs_to_http(URLContext *h, const char *uri,
> >>>>> +                                  int flags, AVDictionary **options)
> >>>>> +{
> >>>>> +    const char *ipfs_cid;
> >>>>> +    char *fulluri = NULL;
> >>>>> +    int ret;
> >>>>> +    IPFSGatewayContext *c = h->priv_data;
> >>>>> +
> >>>>> +    // Test for ipfs://, ipfs:, ipns:// and ipns:. This prefix is
> >>>> stripped from
> >>>>> +    // the string leaving just the CID in ipfs_cid.
> >>>>> +    int is_ipfs = av_stristart(uri, "ipfs://", &ipfs_cid);
> >>>>> +    int is_ipns = av_stristart(uri, "ipns://", &ipfs_cid);
> >>>>> +
> >>>>> +    // We must have either ipns or ipfs.
> >>>>> +    if (!is_ipfs && !is_ipns) {
> >>>>> +        ret = AVERROR(EINVAL);
> >>>>> +        av_log(h, AV_LOG_WARNING, "Unsupported url %s\n", uri);
> >>>>> +        goto err;
> >>>>> +    }
> >>>>> +
> >>>>> +    // If the CID has a length greater then 0 then we assume we
> have a
> >>>> proper working one.
> >>>>> +    // It could still be wrong but in that case the gateway should
> >> save
> >>>> us and
> >>>>> +    // ruturn a 403 error. The http protocol handles this.
> >>>>> +    if (strlen(ipfs_cid) < 1) {
> >>>>> +        av_log(h, AV_LOG_WARNING, "A CID must be provided.\n");
> >>>>> +        ret = AVERROR(EILSEQ);
> >>>>> +        goto err;
> >>>>> +    }
> >>>>> +
> >>>>> +    // Populate c->gateway_buffer with whatever is in c->gateway
> >>>>> +    if (c->gateway != NULL) {
> >>>>> +        if (snprintf(c->gateway_buffer, sizeof(c->gateway_buffer),
> >> "%s",
> >>>>> +                     c->gateway) >= sizeof(c->gateway_buffer)) {
> >>>>> +            av_log(h, AV_LOG_WARNING, "The -gateway parameter is too
> >>>> long. We allow a max of %zu characters\n", sizeof(c->gateway_buffer));
> >>>>
> >>>> We typically use SIZE_SPECIFIER instead of z for compatibility with
> >>>> ancient versions of MSVC.
> >>>> (I don't know whether there is any supported version of MSVC that
> >>>> doesn't support z; I don't use MSVC myself.)
> >>>>
> >>>
> >>> Here too I was explicitly suggested to use %zu (when I was using - I
> >> think
> >>> - %lu before).
> >>> So I assume that the ancient MSVC version you're referring to is
> probably
> >>> not supported anymore from an ffmpeg compiler requirement point of
> view?
> >>>
> >>> Should i now change all "%zu" to "%"SIZE_SPECIFIER (this does not make
> it
> >>> neater nor shorter with the 80 char line limit).
> >>> Is this change required?
> >>
> >> As said: It is for compatibility with ancient versions of MSVC. But I
> >> don't know whether any of the actually supported versions of MSVC still
> >> need it.
> >>
> >>>
> >>> If it is, what _exactly_ do i need to change it in? I see a
> >>> couple different SIZE_SPECIFIER prefixes. I have no clue what to use
> >> here.
> >>
> >> There is only one SIZE_SPECIFIER, namely SIZE_SPECIFIER; the
> >> PTRDIFF_SPECIFIER (or whatever you see) is obviously not the thing to
> >> use for size_t.
> >>
> >> I see some:
> > %"SIZE_SPECIFIER"
> >
> > But also like this:
> > %8"SIZE_SPECIFIER
> > %5"SIZE_SPECIFIER
> >
> > I'm assuming they are for clipping?
>
> printf specifiers consist of flags, minimum field width, precision
> (indicated by '.'), length modifier and conversion specifier (in that
> order). Like the macros in inttypes.h SIZE_SPECIFIER expands to a
> character string literal that includes a length modifier and a
> conversion specifier, so that one can use this together with the former
> three possibilities.
> Of course, it would make no sense to use any of these three here, so it
> would just be %"SIZE_SPECIFIER".
>
> >
> > I really don't like having to change it to this as it:
> > - Uglifies the code (and i doubt if it's even needed)
>
> I agree to both points. Which is why I am not forcing you to do this. It
> seems that there is already at least one commonly compiled instance
> where it is not used at all (libavformat/argo_cvg.c).
>

So i leave it as "%zu". Check.
I did also grep on the entire ffmpeg source for this. Both seems to be used
in various places though granted, SIZE_SPECIFIER is used more.

>
> > - Directly contradicts earlier feedback
>
> I don't agree with that given that SIZE_SPECIFIER is basically just a
> wrapper around zu.
>

My mistake, it doesn't contradict it. Sorry for that.

>
> > - And due to the above gives me a feeling of wasting time
> > - Fine to change it but I hope there isn't another person popping up
> with a
> > question to change it yet again....
> >
> >
> >>>
> >>>
> >>>>> +            ret = AVERROR(EINVAL);
> >>>>> +            goto err;
> >>>>> +        }
> >>>>> +    } else {
> >>>>> +        // Populate the IPFS gateway if we have any.
> >>>>> +        // If not, inform the user how to properly set one.
> >>>>> +        ret = populate_ipfs_gateway(h);
> >>>>> +
> >>>>> +        if (ret < 1) {
> >>>>> +            // We fallback on dweb.link (managed by Protocol Labs).
> >>>>> +            snprintf(c->gateway_buffer, sizeof(c->gateway_buffer), "
> >>>> https://dweb.link");
> >>>>> +
> >>>>> +            av_log(h, AV_LOG_WARNING, "IPFS does not appear to be
> >>>> running. You’re now using the public gateway at dweb.link.\n");
> >>>>> +            av_log(h, AV_LOG_INFO, "Installing IPFS locally is
> >>>> recommended to improve performance and reliability, and not share all
> >> your
> >>>> activity with a single IPFS gateway.\n");
> >>>>> +            av_log(h, AV_LOG_INFO, "There are multiple options to
> >>>> define this gateway.\n");
> >>>>> +            av_log(h, AV_LOG_INFO, "1. Call ffmpeg with a gateway
> >>>> param, without a trailing slash: -gateway <url>.\n");
> >>>>> +            av_log(h, AV_LOG_INFO, "2. Define an $IPFS_GATEWAY
> >>>> environment variable with the full HTTP URL to the gateway without
> >> trailing
> >>>> forward slash.\n");
> >>>>> +            av_log(h, AV_LOG_INFO, "3. Define an $IPFS_PATH
> >> environment
> >>>> variable and point it to the IPFS data path - this is typically
> >> ~/.ipfs\n");
> >>>>
> >>>> All those AV_LOG_INFO can be combined which has the advantage that the
> >>>> logs can't be teared apart (which they can now if something else logs
> at
> >>>> the same time); furthermore, this would also decrease codesize.
> >>>>
> >>>
> >>> Do you have an example of where that's happening?
> >>>
> >>
> >> It can happen any time you have multiple threads using av_log at the
> >> same time. Given that your statements are supposed to be full lines, it
> >> is not that bad if it happens here, but it is nevertheless suboptimal.
> >>
> >
> > I meant a multi-line av_log example ;) But yeah, I get how threading
> could
> > screw this up.
> >
>
> I meant something like:
> av_log(h, AV_LOG_INFO, "Installing IPFS locally is recommended to
> improve performance and reliability, and not share all your activity
> with a single IPFS gateway.\n"
>        "There are multiple options to define this gateway.\n"
>        "1. Call ffmpeg with a gateway param, without a trailing slash:
> -gateway <url>.\n"
>        "2. Define an $IPFS_GATEWAY environment variable with the full
> HTTP URL to the gateway without trailing forward slash.\n"
>        "3. Define an $IPFS_PATH environment variable and point it to the
> IPFS data path - this is typically ~/.ipfs\n");
>
> (These lines are actually way too long.)
>

Fixed as multiline.
I do like to keep the content of the lines though.
I also checked this with IPFS folks to be as correct and clear as we could
make it.

>
> - Andreas
>
Mark Gaiser March 31, 2022, 10:25 p.m. UTC | #8
On Fri, Apr 1, 2022 at 12:17 AM Mark Gaiser <markg85@gmail.com> wrote:

> On Thu, Mar 31, 2022 at 11:44 PM Andreas Rheinhardt <
> andreas.rheinhardt@outlook.com> wrote:
>
>> Mark Gaiser:
>> > On Wed, Mar 30, 2022 at 3:57 PM Andreas Rheinhardt <
>> > andreas.rheinhardt@outlook.com> wrote:
>> >
>> >> Mark Gaiser:
>> >>> On Wed, Mar 30, 2022 at 2:21 PM Andreas Rheinhardt <
>> >>> andreas.rheinhardt@outlook.com> wrote:
>> >>>
>> >>>> Mark Gaiser:
>> >>>>> This patch adds support for:
>> >>>>> - ffplay ipfs://<cid>
>> >>>>> - ffplay ipns://<cid>
>> >>>>>
>> >>>>> IPFS data can be played from so called "ipfs gateways".
>> >>>>> A gateway is essentially a webserver that gives access to the
>> >>>>> distributed IPFS network.
>> >>>>>
>> >>>>> This protocol support (ipfs and ipns) therefore translates
>> >>>>> ipfs:// and ipns:// to a http:// url. This resulting url is
>> >>>>> then handled by the http protocol. It could also be https
>> >>>>> depending on the gateway provided.
>> >>>>>
>> >>>>> To use this protocol, a gateway must be provided.
>> >>>>> If you do nothing it will try to find it in your
>> >>>>> $HOME/.ipfs/gateway file. The ways to set it manually are:
>> >>>>> 1. Define a -gateway <url> to the gateway.
>> >>>>> 2. Define $IPFS_GATEWAY with the full http link to the gateway.
>> >>>>> 3. Define $IPFS_PATH and point it to the IPFS data path.
>> >>>>> 4. Have IPFS running in your local user folder (under $HOME/.ipfs).
>> >>>>>
>> >>>>> Signed-off-by: Mark Gaiser <markg85@gmail.com>
>> >>>>> ---
>> >>>>>  configure                 |   2 +
>> >>>>>  doc/protocols.texi        |  30 ++++
>> >>>>>  libavformat/Makefile      |   2 +
>> >>>>>  libavformat/ipfsgateway.c | 309
>> ++++++++++++++++++++++++++++++++++++++
>> >>>>>  libavformat/protocols.c   |   2 +
>> >>>>>  5 files changed, 345 insertions(+)
>> >>>>>  create mode 100644 libavformat/ipfsgateway.c
>> >>>>>
>> >>>>> diff --git a/configure b/configure
>> >>>>> index e4d36aa639..55af90957a 100755
>> >>>>> --- a/configure
>> >>>>> +++ b/configure
>> >>>>> @@ -3579,6 +3579,8 @@ udp_protocol_select="network"
>> >>>>>  udplite_protocol_select="network"
>> >>>>>  unix_protocol_deps="sys_un_h"
>> >>>>>  unix_protocol_select="network"
>> >>>>> +ipfs_protocol_select="https_protocol"
>> >>>>> +ipns_protocol_select="https_protocol"
>> >>>>>
>> >>>>>  # external library protocols
>> >>>>>  libamqp_protocol_deps="librabbitmq"
>> >>>>> diff --git a/doc/protocols.texi b/doc/protocols.texi
>> >>>>> index d207df0b52..7c9c0a4808 100644
>> >>>>> --- a/doc/protocols.texi
>> >>>>> +++ b/doc/protocols.texi
>> >>>>> @@ -2025,5 +2025,35 @@ decoding errors.
>> >>>>>
>> >>>>>  @end table
>> >>>>>
>> >>>>> +@section ipfs
>> >>>>> +
>> >>>>> +InterPlanetary File System (IPFS) protocol support. One can access
>> >>>> files stored
>> >>>>> +on the IPFS network through so called gateways. Those are http(s)
>> >>>> endpoints.
>> >>>>> +This protocol wraps the IPFS native protocols (ipfs:// and
>> ipns://) to
>> >>>> be send
>> >>>>> +to such a gateway. Users can (and should) host their own node which
>> >>>> means this
>> >>>>> +protocol will use your local machine gateway to access files on the
>> >>>> IPFS network.
>> >>>>> +
>> >>>>> +If a user doesn't have a node of their own then the public gateway
>> >>>> dweb.link is
>> >>>>> +used by default.
>> >>>>> +
>> >>>>> +You can use this protocol in 2 ways. Using IPFS:
>> >>>>> +@example
>> >>>>> +ffplay ipfs://QmbGtJg23skhvFmu9mJiePVByhfzu5rwo74MEkVDYAmF5T
>> >>>>> +@end example
>> >>>>> +
>> >>>>> +Or the IPNS protocol (IPNS is mutable IPFS):
>> >>>>> +@example
>> >>>>> +ffplay ipns://QmbGtJg23skhvFmu9mJiePVByhfzu5rwo74MEkVDYAmF5T
>> >>>>> +@end example
>> >>>>> +
>> >>>>> +You can also change the gateway to be used:
>> >>>>> +
>> >>>>> +@table @option
>> >>>>> +
>> >>>>> +@item gateway
>> >>>>> +Defines the gateway to use. When nothing is provided the protocol
>> will
>> >>>> first try
>> >>>>> +your local gateway. If that fails dweb.link will be used.
>> >>>>> +
>> >>>>> +@end table
>> >>>>>
>> >>>>>  @c man end PROTOCOLS
>> >>>>> diff --git a/libavformat/Makefile b/libavformat/Makefile
>> >>>>> index d7182d6bd8..e3233fd7ac 100644
>> >>>>> --- a/libavformat/Makefile
>> >>>>> +++ b/libavformat/Makefile
>> >>>>> @@ -660,6 +660,8 @@ OBJS-$(CONFIG_SRTP_PROTOCOL)             +=
>> >>>> srtpproto.o srtp.o
>> >>>>>  OBJS-$(CONFIG_SUBFILE_PROTOCOL)          += subfile.o
>> >>>>>  OBJS-$(CONFIG_TEE_PROTOCOL)              += teeproto.o tee_common.o
>> >>>>>  OBJS-$(CONFIG_TCP_PROTOCOL)              += tcp.o
>> >>>>> +OBJS-$(CONFIG_IPFS_PROTOCOL)             += ipfsgateway.o
>> >>>>> +OBJS-$(CONFIG_IPNS_PROTOCOL)             += ipfsgateway.o
>> >>>>>  TLS-OBJS-$(CONFIG_GNUTLS)                += tls_gnutls.o
>> >>>>>  TLS-OBJS-$(CONFIG_LIBTLS)                += tls_libtls.o
>> >>>>>  TLS-OBJS-$(CONFIG_MBEDTLS)               += tls_mbedtls.o
>> >>>>> diff --git a/libavformat/ipfsgateway.c b/libavformat/ipfsgateway.c
>> >>>>> new file mode 100644
>> >>>>> index 0000000000..1a039589c0
>> >>>>> --- /dev/null
>> >>>>> +++ b/libavformat/ipfsgateway.c
>> >>>>> @@ -0,0 +1,309 @@
>> >>>>> +/*
>> >>>>> + * IPFS and IPNS protocol support through IPFS Gateway.
>> >>>>> + * Copyright (c) 2022 Mark Gaiser
>> >>>>> + *
>> >>>>> + * This file is part of FFmpeg.
>> >>>>> + *
>> >>>>> + * FFmpeg is free software; you can redistribute it and/or
>> >>>>> + * modify it under the terms of the GNU Lesser General Public
>> >>>>> + * License as published by the Free Software Foundation; either
>> >>>>> + * version 2.1 of the License, or (at your option) any later
>> version.
>> >>>>> + *
>> >>>>> + * FFmpeg is distributed in the hope that it will be useful,
>> >>>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> >>>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> GNU
>> >>>>> + * Lesser General Public License for more details.
>> >>>>> + *
>> >>>>> + * You should have received a copy of the GNU Lesser General Public
>> >>>>> + * License along with FFmpeg; if not, write to the Free Software
>> >>>>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
>> >>>> 02110-1301 USA
>> >>>>> + */
>> >>>>> +
>> >>>>> +#include "avformat.h"
>> >>>>> +#include "libavutil/avassert.h"
>> >>>>
>> >>>> Unused.
>> >>>>
>> >>>>> +#include "libavutil/avstring.h"
>> >>>>> +#include "libavutil/internal.h"
>> >>>>> +#include "libavutil/opt.h"
>> >>>>> +#include "libavutil/tree.h"
>> >>>>
>> >>>> ?
>> >>>>
>> >>>
>> >>> This whole include part can be cleaned up much more.
>> >>> Just having these:
>> >>> #include "libavutil/avstring.h"
>> >>> #include "libavutil/opt.h"
>> >>> #include "url.h"
>> >>> #include <sys/stat.h>
>> >>>
>> >>> is enough.
>> >>>
>> >>>
>> >>>>> +#include <fcntl.h>
>> >>>>> +#if HAVE_IO_H
>> >>>>> +#include <io.h>
>> >>>>> +#endif
>> >>>>> +#if HAVE_UNISTD_H
>> >>>>> +#include <unistd.h>
>> >>>>> +#endif
>> >>>>> +#include "os_support.h"
>> >>>>> +#include "url.h"
>> >>>>> +#include <stdlib.h>
>> >>>>> +#include <sys/stat.h>
>> >>>>> +
>> >>>>> +typedef struct IPFSGatewayContext {
>> >>>>> +    AVClass *class;
>> >>>>> +    URLContext *inner;
>> >>>>> +    // Is filled by the -gateway argument and not changed after.
>> >>>>> +    char *gateway;
>> >>>>> +    // If the above gateway is non null, it will be copied into
>> this
>> >>>> buffer.
>> >>>>> +    // Else this buffer will contain the auto detected gateway.
>> >>>>> +    // In either case, the gateway to use will be in this buffer.
>> >>>>> +    char gateway_buffer[PATH_MAX];
>> >>>>> +} IPFSGatewayContext;
>> >>>>> +
>> >>>>> +// A best-effort way to find the IPFS gateway.
>> >>>>> +// Only the most appropiate gateway is set. It's not actually
>> >> requested
>> >>>>> +// (http call) to prevent a potential slowdown in startup. A
>> potential
>> >>>> timeout
>> >>>>> +// is handled by the HTTP protocol.
>> >>>>> +static int populate_ipfs_gateway(URLContext *h)
>> >>>>> +{
>> >>>>> +    IPFSGatewayContext *c = h->priv_data;
>> >>>>> +    char ipfs_full_data_folder[PATH_MAX];
>> >>>>> +    char ipfs_gateway_file[PATH_MAX];
>> >>>>> +    struct stat st;
>> >>>>> +    int stat_ret = 0;
>> >>>>> +    int ret = AVERROR(EINVAL);
>> >>>>> +    FILE *gateway_file = NULL;
>> >>>>> +
>> >>>>> +    // Test $IPFS_GATEWAY.
>> >>>>> +    if (getenv("IPFS_GATEWAY") != NULL) {
>> >>>>> +        if (snprintf(c->gateway_buffer, sizeof(c->gateway_buffer),
>> >> "%s",
>> >>>>> +                     getenv("IPFS_GATEWAY")) >=
>> >>>> sizeof(c->gateway_buffer)) {
>> >>>>> +            av_log(h, AV_LOG_WARNING, "The IPFS_GATEWAY environment
>> >>>> variable exceeds the maximum length. We allow a max of %zu
>> >> characters\n",
>> >>>> sizeof(c->gateway_buffer));
>> >>>>> +            ret = AVERROR(EINVAL);
>> >>>>> +            goto err;
>> >>>>> +        }
>> >>>>> +
>> >>>>> +        ret = 1;
>> >>>>> +        goto err;
>> >>>>> +    } else
>> >>>>> +        av_log(h, AV_LOG_DEBUG, "$IPFS_GATEWAY is empty.\n");
>> >>>>> +
>> >>>>> +    // We need to know the IPFS folder to - eventually - read the
>> >>>> contents of
>> >>>>> +    // the "gateway" file which would tell us the gateway to use.
>> >>>>> +    if (getenv("IPFS_PATH") == NULL) {
>> >>>>> +        av_log(h, AV_LOG_DEBUG, "$IPFS_PATH is empty.\n");
>> >>>>> +
>> >>>>> +        // Try via the home folder.
>> >>>>> +        if (getenv("HOME") == NULL) {
>> >>>>> +            av_log(h, AV_LOG_WARNING, "$HOME appears to be
>> empty.\n");
>> >>>>> +            ret = AVERROR(EINVAL);
>> >>>>> +            goto err;
>> >>>>> +        }
>> >>>>> +
>> >>>>> +        // Verify the composed path fits.
>> >>>>> +        if (snprintf(ipfs_full_data_folder,
>> >>>> sizeof(ipfs_full_data_folder),
>> >>>>> +                     "%s/.ipfs/", getenv("HOME")) >=
>> >>>> sizeof(ipfs_full_data_folder)) {
>> >>>>> +            av_log(h, AV_LOG_WARNING, "The IPFS data path exceeds
>> the
>> >>>> max path length (%zu)\n", sizeof(ipfs_full_data_folder));
>> >>>>> +            ret = AVERROR(EINVAL);
>> >>>>> +            goto err;
>> >>>>> +        }
>> >>>>> +
>> >>>>> +        // Stat the folder.
>> >>>>> +        // It should exist in a default IPFS setup when run as
>> local
>> >>>> user.
>> >>>>> +#ifndef _WIN32
>> >>>>> +        stat_ret = stat(ipfs_full_data_folder, &st);
>> >>>>> +#else
>> >>>>> +        stat_ret = win32_stat(ipfs_full_data_folder, &st);
>> >>>>> +#endif
>> >>>>> +        if (stat_ret < 0) {
>> >>>>> +            av_log(h, AV_LOG_INFO, "Unable to find IPFS folder. We
>> >>>> tried:\n");
>> >>>>> +            av_log(h, AV_LOG_INFO, "- $IPFS_PATH, which was
>> >> empty.\n");
>> >>>>> +            av_log(h, AV_LOG_INFO, "- $HOME/.ipfs (full uri: %s)
>> which
>> >>>> doesn't exist.\n", ipfs_full_data_folder);
>> >>>>> +            ret = AVERROR(ENOENT);
>> >>>>> +            goto err;
>> >>>>> +        }
>> >>>>> +    } else {
>> >>>>> +        if (snprintf(ipfs_full_data_folder,
>> >>>> sizeof(ipfs_full_data_folder), "%s",
>> >>>>> +                 getenv("IPFS_PATH")) >=
>> >> sizeof(ipfs_full_data_folder))
>> >>>> {
>> >>>>> +            av_log(h, AV_LOG_WARNING, "The IPFS_PATH environment
>> >>>> variable exceeds the maximum length. We allow a max of %zu
>> >> characters\n",
>> >>>> sizeof(c->gateway_buffer));
>> >>>>> +            ret = AVERROR(EINVAL);
>> >>>>> +            goto err;
>> >>>>> +        }
>> >>>>> +
>> >>>>> +    }
>> >>>>> +
>> >>>>> +    // Copy the fully composed gateway path into ipfs_gateway_file.
>> >>>>> +    if (snprintf(ipfs_gateway_file, sizeof(ipfs_gateway_file),
>> >>>> "%sgateway",
>> >>>>> +                 ipfs_full_data_folder) >=
>> sizeof(ipfs_gateway_file))
>> >> {
>> >>>>> +        av_log(h, AV_LOG_WARNING, "The IPFS gateway file path
>> exceeds
>> >>>> the max path length (%zu)\n", sizeof(ipfs_gateway_file));
>> >>>>> +        ret = AVERROR(ENOENT);
>> >>>>> +        goto err;
>> >>>>> +    }
>> >>>>> +
>> >>>>> +    // Get the contents of the gateway file.
>> >>>>> +    gateway_file = av_fopen_utf8(ipfs_gateway_file, "r");
>> >>>>> +    if (!gateway_file) {
>> >>>>> +        av_log(h, AV_LOG_WARNING, "The IPFS gateway file (full uri:
>> >> %s)
>> >>>> doesn't exist. Is the gateway enabled?\n", ipfs_gateway_file);
>> >>>>> +        ret = AVERROR(ENOENT);
>> >>>>> +        goto err;
>> >>>>> +    }
>> >>>>> +
>> >>>>> +    // Read a single line (fgets stops at new line mark).
>> >>>>> +    fgets(c->gateway_buffer, sizeof(c->gateway_buffer) - 1,
>> >>>> gateway_file);
>> >>>>> +
>> >>>>> +    // Replace the last char with \0
>> >>>>> +    c->gateway_buffer[sizeof(c->gateway_buffer) - 1] = 0;
>>
>> Btw: fgets normally zero-terminates the buffer (i.e. it reads at most
>> (sizeof(c->gateway_buffer) - 1) - 1 characters from the stream and adds
>> a \0. But on a read error it doesn't do this, instead the contents of
>> the buffer are indeterminate and so should not be used. Adding a zero at
>> the very end of gateway_buffer does not help to enforce this.
>
>
>> fgets returns NULL on read error and if nothing could be read due to
>> immediate EOF; so you should check for that.
>> (Btw: A protocol's context is zeroed initially, so
>> c->gateway_buffer[sizeof(c->gateway_buffer) - 1] is already zero,
>> because no one ever wrote anything else there.)
>>
>
> Nice one! Nobody caught that before.
> So checking on NULL for fgets and removing the line to add a 0 at the end
> is the appropriate fix here I assume.
>
> Changed to what I assumed. Please correct me if I'm wrong.
>
>
>>
>> >>>>> +
>> >>>>> +    // Replace first occurence of end of line with \0
>> >>>>> +    c->gateway_buffer[strcspn(c->gateway_buffer, "\r")] = 0;
>> >>>>> +    c->gateway_buffer[strcspn(c->gateway_buffer, "\n")] = 0;
>> >>>>
>> >>>> If the buffer contains both \r and \n and the first \n precedes the
>> >>>> first \r, then the above zeroes both the first \r and the first \n.
>> If
>> >>>> it is enough to only zero the first newline, then this can be
>> simplified
>> >>>> to "c->gateway_buffer[strcspn(c->gateway_buffer, "\r\n")] = 0;".
>> >>>>
>> >>>
>> >>> It used to be in your suggested simplified approach.
>> >>> It was then suggested to split it because of new line differences
>> >>> on different platforms. This was supposed to catch "everything".
>> >>>
>> >>> I prefer to keep this as-is now.
>> >>>
>> >>
>> >> Is it intended to use what is between the first \n and the first \r
>> >> lateron although this data will be after the '\0' after this? That
>> would
>> >> be very weird.
>> >>
>> >
>> > You have to elaborate on that as that sounds really wrong to me.
>> >
>>
>> c->gateway_buffer[strcspn(c->gateway_buffer, "\r\n")] = 0;
>> overwrites the first \r or \n (whichever is first) with '\0' (if any;
>> otherwise it overwrites the terminating \0 with \0.
>>
>> c->gateway_buffer[strcspn(c->gateway_buffer, "\r")] = 0;
>> c->gateway_buffer[strcspn(c->gateway_buffer, "\n")] = 0;
>> also does this, but in case that the string originally contained both a
>> \r and a \n with the first \n preceding the first \r it would also zero
>> the first \r; this position is after the first terminating \0 (those at
>> the position where the first \n was), so it appeared to me that you
>> intended to use what is after the first \0. Because that is the only
>> difference between the approach with two calls to strcspn and the one
>> with one call to strcspn.
>>
>> But then I read that this has been suggested to you and found the mail
>> (https://ffmpeg.org/pipermail/ffmpeg-devel/2022-February/292590.html)
>> this referred to. There seems to be a misunderstanding in the semantics
>> of strcspn (most likely a confusion with strstr):
>> strcspn(c->gateway_buffer, "\r\n") works with single \r and single \n,
>> there need not be a single "\r\n" substring. Both methods would produce
>> strings that compare equal according to strcmp().
>>
>>
> Thank you for your elaborate explanation here!
> But now I'm not sure what the resolution - if any - should be?
>
>
>> > If I'm correct a file doesn't start with either \r or \n. Unless you
>> have
>> > intentional line breaks.
>> > It ends with those, not starts. I'm just going to ignore \r for now for
>> > brevity.
>> >
>> > So if you have:
>> > \n<your gateway>\n
>> >
>> > Would be translated to:
>> > \0<your gateway>\n
>> >
>> > Then indeed you won't get a gateway because it's intended that your
>> first
>> > line is that gateway.
>> >
>> > It must be in this format:
>> > <your gateway>\n
>> >
>> > or without a newline
>> > <your gateway>
>> >
>> > There is no blank line trimming to fix user screwups.
>> > Users are not intended to touch that file anyhow. It's auto-generated by
>> > IPFS and removed once it shuts down.
>> >
>> >
>> >>>
>> >>>>> +
>> >>>>> +    // If strlen finds anything longer then 0 characters then we
>> have
>> >> a
>> >>>>> +    // potential gateway url.
>> >>>>> +    if (strlen(c->gateway_buffer) < 1) {
>> >>>>
>> >>>> if (*c->gateway_buffer == '\0')
>> >>>>
>> >>>
>> >>> Is that a style difference or an actual behavior difference?
>> >>>
>> >>
>> >> The former would call strlen to check whether a string is empty (unless
>> >> the compiler optimizes it away). No actual behaviour difference exists.
>> >> But it is nevertheless more than a style difference.
>> >>
>> >
>> > Fixed locally.
>> >
>> >>
>> >>>
>> >>>>
>> >>>>> +        av_log(h, AV_LOG_WARNING, "The IPFS gateway file (full uri:
>> >> %s)
>> >>>> appears to be empty. Is the gateway started?\n", ipfs_gateway_file);
>> >>>>> +        ret = AVERROR(EILSEQ);
>> >>>>> +        goto err;
>> >>>>> +    } else {
>> >>>>> +        // We're done, the c->gateway_buffer has something that
>> looks
>> >>>> valid.
>> >>>>> +        ret = 1;
>> >>>>> +        goto err;
>> >>>>> +    }
>> >>>>> +
>> >>>>> +err:
>> >>>>> +    if (gateway_file)
>> >>>>> +        fclose(gateway_file);
>> >>>>> +
>> >>>>> +    return ret;
>> >>>>> +}
>> >>>>> +
>> >>>>> +static int translate_ipfs_to_http(URLContext *h, const char *uri,
>> >>>>> +                                  int flags, AVDictionary
>> **options)
>> >>>>> +{
>> >>>>> +    const char *ipfs_cid;
>> >>>>> +    char *fulluri = NULL;
>> >>>>> +    int ret;
>> >>>>> +    IPFSGatewayContext *c = h->priv_data;
>> >>>>> +
>> >>>>> +    // Test for ipfs://, ipfs:, ipns:// and ipns:. This prefix is
>> >>>> stripped from
>> >>>>> +    // the string leaving just the CID in ipfs_cid.
>> >>>>> +    int is_ipfs = av_stristart(uri, "ipfs://", &ipfs_cid);
>> >>>>> +    int is_ipns = av_stristart(uri, "ipns://", &ipfs_cid);
>> >>>>> +
>> >>>>> +    // We must have either ipns or ipfs.
>> >>>>> +    if (!is_ipfs && !is_ipns) {
>> >>>>> +        ret = AVERROR(EINVAL);
>> >>>>> +        av_log(h, AV_LOG_WARNING, "Unsupported url %s\n", uri);
>> >>>>> +        goto err;
>> >>>>> +    }
>> >>>>> +
>> >>>>> +    // If the CID has a length greater then 0 then we assume we
>> have a
>> >>>> proper working one.
>> >>>>> +    // It could still be wrong but in that case the gateway should
>> >> save
>> >>>> us and
>> >>>>> +    // ruturn a 403 error. The http protocol handles this.
>> >>>>> +    if (strlen(ipfs_cid) < 1) {
>> >>>>> +        av_log(h, AV_LOG_WARNING, "A CID must be provided.\n");
>> >>>>> +        ret = AVERROR(EILSEQ);
>> >>>>> +        goto err;
>> >>>>> +    }
>> >>>>> +
>> >>>>> +    // Populate c->gateway_buffer with whatever is in c->gateway
>> >>>>> +    if (c->gateway != NULL) {
>> >>>>> +        if (snprintf(c->gateway_buffer, sizeof(c->gateway_buffer),
>> >> "%s",
>> >>>>> +                     c->gateway) >= sizeof(c->gateway_buffer)) {
>> >>>>> +            av_log(h, AV_LOG_WARNING, "The -gateway parameter is
>> too
>> >>>> long. We allow a max of %zu characters\n",
>> sizeof(c->gateway_buffer));
>> >>>>
>> >>>> We typically use SIZE_SPECIFIER instead of z for compatibility with
>> >>>> ancient versions of MSVC.
>> >>>> (I don't know whether there is any supported version of MSVC that
>> >>>> doesn't support z; I don't use MSVC myself.)
>> >>>>
>> >>>
>> >>> Here too I was explicitly suggested to use %zu (when I was using - I
>> >> think
>> >>> - %lu before).
>> >>> So I assume that the ancient MSVC version you're referring to is
>> probably
>> >>> not supported anymore from an ffmpeg compiler requirement point of
>> view?
>> >>>
>> >>> Should i now change all "%zu" to "%"SIZE_SPECIFIER (this does not
>> make it
>> >>> neater nor shorter with the 80 char line limit).
>> >>> Is this change required?
>> >>
>> >> As said: It is for compatibility with ancient versions of MSVC. But I
>> >> don't know whether any of the actually supported versions of MSVC still
>> >> need it.
>> >>
>> >>>
>> >>> If it is, what _exactly_ do i need to change it in? I see a
>> >>> couple different SIZE_SPECIFIER prefixes. I have no clue what to use
>> >> here.
>> >>
>> >> There is only one SIZE_SPECIFIER, namely SIZE_SPECIFIER; the
>> >> PTRDIFF_SPECIFIER (or whatever you see) is obviously not the thing to
>> >> use for size_t.
>> >>
>> >> I see some:
>> > %"SIZE_SPECIFIER"
>> >
>> > But also like this:
>> > %8"SIZE_SPECIFIER
>> > %5"SIZE_SPECIFIER
>> >
>> > I'm assuming they are for clipping?
>>
>> printf specifiers consist of flags, minimum field width, precision
>> (indicated by '.'), length modifier and conversion specifier (in that
>> order). Like the macros in inttypes.h SIZE_SPECIFIER expands to a
>> character string literal that includes a length modifier and a
>> conversion specifier, so that one can use this together with the former
>> three possibilities.
>> Of course, it would make no sense to use any of these three here, so it
>> would just be %"SIZE_SPECIFIER".
>>
>> >
>> > I really don't like having to change it to this as it:
>> > - Uglifies the code (and i doubt if it's even needed)
>>
>> I agree to both points. Which is why I am not forcing you to do this. It
>> seems that there is already at least one commonly compiled instance
>> where it is not used at all (libavformat/argo_cvg.c).
>>
>
> So i leave it as "%zu". Check.
> I did also grep on the entire ffmpeg source for this. Both seems to be
> used in various places though granted, SIZE_SPECIFIER is used more.
>
>>
>> > - Directly contradicts earlier feedback
>>
>> I don't agree with that given that SIZE_SPECIFIER is basically just a
>> wrapper around zu.
>>
>
> My mistake, it doesn't contradict it. Sorry for that.
>
>>
>> > - And due to the above gives me a feeling of wasting time
>> > - Fine to change it but I hope there isn't another person popping up
>> with a
>> > question to change it yet again....
>> >
>> >
>> >>>
>> >>>
>> >>>>> +            ret = AVERROR(EINVAL);
>> >>>>> +            goto err;
>> >>>>> +        }
>> >>>>> +    } else {
>> >>>>> +        // Populate the IPFS gateway if we have any.
>> >>>>> +        // If not, inform the user how to properly set one.
>> >>>>> +        ret = populate_ipfs_gateway(h);
>> >>>>> +
>> >>>>> +        if (ret < 1) {
>> >>>>> +            // We fallback on dweb.link (managed by Protocol Labs).
>> >>>>> +            snprintf(c->gateway_buffer, sizeof(c->gateway_buffer),
>> "
>> >>>> https://dweb.link");
>> >>>>> +
>> >>>>> +            av_log(h, AV_LOG_WARNING, "IPFS does not appear to be
>> >>>> running. You’re now using the public gateway at dweb.link.\n");
>> >>>>> +            av_log(h, AV_LOG_INFO, "Installing IPFS locally is
>> >>>> recommended to improve performance and reliability, and not share all
>> >> your
>> >>>> activity with a single IPFS gateway.\n");
>> >>>>> +            av_log(h, AV_LOG_INFO, "There are multiple options to
>> >>>> define this gateway.\n");
>> >>>>> +            av_log(h, AV_LOG_INFO, "1. Call ffmpeg with a gateway
>> >>>> param, without a trailing slash: -gateway <url>.\n");
>> >>>>> +            av_log(h, AV_LOG_INFO, "2. Define an $IPFS_GATEWAY
>> >>>> environment variable with the full HTTP URL to the gateway without
>> >> trailing
>> >>>> forward slash.\n");
>> >>>>> +            av_log(h, AV_LOG_INFO, "3. Define an $IPFS_PATH
>> >> environment
>> >>>> variable and point it to the IPFS data path - this is typically
>> >> ~/.ipfs\n");
>> >>>>
>> >>>> All those AV_LOG_INFO can be combined which has the advantage that
>> the
>> >>>> logs can't be teared apart (which they can now if something else
>> logs at
>> >>>> the same time); furthermore, this would also decrease codesize.
>> >>>>
>> >>>
>> >>> Do you have an example of where that's happening?
>> >>>
>> >>
>> >> It can happen any time you have multiple threads using av_log at the
>> >> same time. Given that your statements are supposed to be full lines, it
>> >> is not that bad if it happens here, but it is nevertheless suboptimal.
>> >>
>> >
>> > I meant a multi-line av_log example ;) But yeah, I get how threading
>> could
>> > screw this up.
>> >
>>
>> I meant something like:
>> av_log(h, AV_LOG_INFO, "Installing IPFS locally is recommended to
>> improve performance and reliability, and not share all your activity
>> with a single IPFS gateway.\n"
>>        "There are multiple options to define this gateway.\n"
>>        "1. Call ffmpeg with a gateway param, without a trailing slash:
>> -gateway <url>.\n"
>>        "2. Define an $IPFS_GATEWAY environment variable with the full
>> HTTP URL to the gateway without trailing forward slash.\n"
>>        "3. Define an $IPFS_PATH environment variable and point it to the
>> IPFS data path - this is typically ~/.ipfs\n");
>>
>> (These lines are actually way too long.)
>>
>
> Fixed as multiline.
> I do like to keep the content of the lines though.
> I also checked this with IPFS folks to be as correct and clear as we could
> make it.
>
>>
>> - Andreas
>>
>
To amend and probably speed things up.
The diff with your feedback applied now looks like this:
https://p.sc2.nl/HfjJt
Andreas Rheinhardt March 31, 2022, 11:01 p.m. UTC | #9
Mark Gaiser:
> On Thu, Mar 31, 2022 at 11:44 PM Andreas Rheinhardt <
> andreas.rheinhardt@outlook.com> wrote:
> 
>> Mark Gaiser:
>>> On Wed, Mar 30, 2022 at 3:57 PM Andreas Rheinhardt <
>>> andreas.rheinhardt@outlook.com> wrote:
>>>
>>>> Mark Gaiser:
>>>>> On Wed, Mar 30, 2022 at 2:21 PM Andreas Rheinhardt <
>>>>> andreas.rheinhardt@outlook.com> wrote:
>>>>>
>>>>>> Mark Gaiser:
>>>>>>> This patch adds support for:
>>>>>>> - ffplay ipfs://<cid>
>>>>>>> - ffplay ipns://<cid>
>>>>>>>
>>>>>>> IPFS data can be played from so called "ipfs gateways".
>>>>>>> A gateway is essentially a webserver that gives access to the
>>>>>>> distributed IPFS network.
>>>>>>>
>>>>>>> This protocol support (ipfs and ipns) therefore translates
>>>>>>> ipfs:// and ipns:// to a http:// url. This resulting url is
>>>>>>> then handled by the http protocol. It could also be https
>>>>>>> depending on the gateway provided.
>>>>>>>
>>>>>>> To use this protocol, a gateway must be provided.
>>>>>>> If you do nothing it will try to find it in your
>>>>>>> $HOME/.ipfs/gateway file. The ways to set it manually are:
>>>>>>> 1. Define a -gateway <url> to the gateway.
>>>>>>> 2. Define $IPFS_GATEWAY with the full http link to the gateway.
>>>>>>> 3. Define $IPFS_PATH and point it to the IPFS data path.
>>>>>>> 4. Have IPFS running in your local user folder (under $HOME/.ipfs).
>>>>>>>
>>>>>>> Signed-off-by: Mark Gaiser <markg85@gmail.com>
>>>>>>> ---
>>>>>>>  configure                 |   2 +
>>>>>>>  doc/protocols.texi        |  30 ++++
>>>>>>>  libavformat/Makefile      |   2 +
>>>>>>>  libavformat/ipfsgateway.c | 309
>> ++++++++++++++++++++++++++++++++++++++
>>>>>>>  libavformat/protocols.c   |   2 +
>>>>>>>  5 files changed, 345 insertions(+)
>>>>>>>  create mode 100644 libavformat/ipfsgateway.c
>>>>>>>
>>>>>>> diff --git a/configure b/configure
>>>>>>> index e4d36aa639..55af90957a 100755
>>>>>>> --- a/configure
>>>>>>> +++ b/configure
>>>>>>> @@ -3579,6 +3579,8 @@ udp_protocol_select="network"
>>>>>>>  udplite_protocol_select="network"
>>>>>>>  unix_protocol_deps="sys_un_h"
>>>>>>>  unix_protocol_select="network"
>>>>>>> +ipfs_protocol_select="https_protocol"
>>>>>>> +ipns_protocol_select="https_protocol"
>>>>>>>
>>>>>>>  # external library protocols
>>>>>>>  libamqp_protocol_deps="librabbitmq"
>>>>>>> diff --git a/doc/protocols.texi b/doc/protocols.texi
>>>>>>> index d207df0b52..7c9c0a4808 100644
>>>>>>> --- a/doc/protocols.texi
>>>>>>> +++ b/doc/protocols.texi
>>>>>>> @@ -2025,5 +2025,35 @@ decoding errors.
>>>>>>>
>>>>>>>  @end table
>>>>>>>
>>>>>>> +@section ipfs
>>>>>>> +
>>>>>>> +InterPlanetary File System (IPFS) protocol support. One can access
>>>>>> files stored
>>>>>>> +on the IPFS network through so called gateways. Those are http(s)
>>>>>> endpoints.
>>>>>>> +This protocol wraps the IPFS native protocols (ipfs:// and ipns://)
>> to
>>>>>> be send
>>>>>>> +to such a gateway. Users can (and should) host their own node which
>>>>>> means this
>>>>>>> +protocol will use your local machine gateway to access files on the
>>>>>> IPFS network.
>>>>>>> +
>>>>>>> +If a user doesn't have a node of their own then the public gateway
>>>>>> dweb.link is
>>>>>>> +used by default.
>>>>>>> +
>>>>>>> +You can use this protocol in 2 ways. Using IPFS:
>>>>>>> +@example
>>>>>>> +ffplay ipfs://QmbGtJg23skhvFmu9mJiePVByhfzu5rwo74MEkVDYAmF5T
>>>>>>> +@end example
>>>>>>> +
>>>>>>> +Or the IPNS protocol (IPNS is mutable IPFS):
>>>>>>> +@example
>>>>>>> +ffplay ipns://QmbGtJg23skhvFmu9mJiePVByhfzu5rwo74MEkVDYAmF5T
>>>>>>> +@end example
>>>>>>> +
>>>>>>> +You can also change the gateway to be used:
>>>>>>> +
>>>>>>> +@table @option
>>>>>>> +
>>>>>>> +@item gateway
>>>>>>> +Defines the gateway to use. When nothing is provided the protocol
>> will
>>>>>> first try
>>>>>>> +your local gateway. If that fails dweb.link will be used.
>>>>>>> +
>>>>>>> +@end table
>>>>>>>
>>>>>>>  @c man end PROTOCOLS
>>>>>>> diff --git a/libavformat/Makefile b/libavformat/Makefile
>>>>>>> index d7182d6bd8..e3233fd7ac 100644
>>>>>>> --- a/libavformat/Makefile
>>>>>>> +++ b/libavformat/Makefile
>>>>>>> @@ -660,6 +660,8 @@ OBJS-$(CONFIG_SRTP_PROTOCOL)             +=
>>>>>> srtpproto.o srtp.o
>>>>>>>  OBJS-$(CONFIG_SUBFILE_PROTOCOL)          += subfile.o
>>>>>>>  OBJS-$(CONFIG_TEE_PROTOCOL)              += teeproto.o tee_common.o
>>>>>>>  OBJS-$(CONFIG_TCP_PROTOCOL)              += tcp.o
>>>>>>> +OBJS-$(CONFIG_IPFS_PROTOCOL)             += ipfsgateway.o
>>>>>>> +OBJS-$(CONFIG_IPNS_PROTOCOL)             += ipfsgateway.o
>>>>>>>  TLS-OBJS-$(CONFIG_GNUTLS)                += tls_gnutls.o
>>>>>>>  TLS-OBJS-$(CONFIG_LIBTLS)                += tls_libtls.o
>>>>>>>  TLS-OBJS-$(CONFIG_MBEDTLS)               += tls_mbedtls.o
>>>>>>> diff --git a/libavformat/ipfsgateway.c b/libavformat/ipfsgateway.c
>>>>>>> new file mode 100644
>>>>>>> index 0000000000..1a039589c0
>>>>>>> --- /dev/null
>>>>>>> +++ b/libavformat/ipfsgateway.c
>>>>>>> @@ -0,0 +1,309 @@
>>>>>>> +/*
>>>>>>> + * IPFS and IPNS protocol support through IPFS Gateway.
>>>>>>> + * Copyright (c) 2022 Mark Gaiser
>>>>>>> + *
>>>>>>> + * This file is part of FFmpeg.
>>>>>>> + *
>>>>>>> + * FFmpeg is free software; you can redistribute it and/or
>>>>>>> + * modify it under the terms of the GNU Lesser General Public
>>>>>>> + * License as published by the Free Software Foundation; either
>>>>>>> + * version 2.1 of the License, or (at your option) any later
>> version.
>>>>>>> + *
>>>>>>> + * FFmpeg is distributed in the hope that it will be useful,
>>>>>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>>>>>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>>>>>> + * Lesser General Public License for more details.
>>>>>>> + *
>>>>>>> + * You should have received a copy of the GNU Lesser General Public
>>>>>>> + * License along with FFmpeg; if not, write to the Free Software
>>>>>>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
>>>>>> 02110-1301 USA
>>>>>>> + */
>>>>>>> +
>>>>>>> +#include "avformat.h"
>>>>>>> +#include "libavutil/avassert.h"
>>>>>>
>>>>>> Unused.
>>>>>>
>>>>>>> +#include "libavutil/avstring.h"
>>>>>>> +#include "libavutil/internal.h"
>>>>>>> +#include "libavutil/opt.h"
>>>>>>> +#include "libavutil/tree.h"
>>>>>>
>>>>>> ?
>>>>>>
>>>>>
>>>>> This whole include part can be cleaned up much more.
>>>>> Just having these:
>>>>> #include "libavutil/avstring.h"
>>>>> #include "libavutil/opt.h"
>>>>> #include "url.h"
>>>>> #include <sys/stat.h>
>>>>>
>>>>> is enough.
>>>>>
>>>>>
>>>>>>> +#include <fcntl.h>
>>>>>>> +#if HAVE_IO_H
>>>>>>> +#include <io.h>
>>>>>>> +#endif
>>>>>>> +#if HAVE_UNISTD_H
>>>>>>> +#include <unistd.h>
>>>>>>> +#endif
>>>>>>> +#include "os_support.h"
>>>>>>> +#include "url.h"
>>>>>>> +#include <stdlib.h>
>>>>>>> +#include <sys/stat.h>
>>>>>>> +
>>>>>>> +typedef struct IPFSGatewayContext {
>>>>>>> +    AVClass *class;
>>>>>>> +    URLContext *inner;
>>>>>>> +    // Is filled by the -gateway argument and not changed after.
>>>>>>> +    char *gateway;
>>>>>>> +    // If the above gateway is non null, it will be copied into this
>>>>>> buffer.
>>>>>>> +    // Else this buffer will contain the auto detected gateway.
>>>>>>> +    // In either case, the gateway to use will be in this buffer.
>>>>>>> +    char gateway_buffer[PATH_MAX];
>>>>>>> +} IPFSGatewayContext;
>>>>>>> +
>>>>>>> +// A best-effort way to find the IPFS gateway.
>>>>>>> +// Only the most appropiate gateway is set. It's not actually
>>>> requested
>>>>>>> +// (http call) to prevent a potential slowdown in startup. A
>> potential
>>>>>> timeout
>>>>>>> +// is handled by the HTTP protocol.
>>>>>>> +static int populate_ipfs_gateway(URLContext *h)
>>>>>>> +{
>>>>>>> +    IPFSGatewayContext *c = h->priv_data;
>>>>>>> +    char ipfs_full_data_folder[PATH_MAX];
>>>>>>> +    char ipfs_gateway_file[PATH_MAX];
>>>>>>> +    struct stat st;
>>>>>>> +    int stat_ret = 0;
>>>>>>> +    int ret = AVERROR(EINVAL);
>>>>>>> +    FILE *gateway_file = NULL;
>>>>>>> +
>>>>>>> +    // Test $IPFS_GATEWAY.
>>>>>>> +    if (getenv("IPFS_GATEWAY") != NULL) {
>>>>>>> +        if (snprintf(c->gateway_buffer, sizeof(c->gateway_buffer),
>>>> "%s",
>>>>>>> +                     getenv("IPFS_GATEWAY")) >=
>>>>>> sizeof(c->gateway_buffer)) {
>>>>>>> +            av_log(h, AV_LOG_WARNING, "The IPFS_GATEWAY environment
>>>>>> variable exceeds the maximum length. We allow a max of %zu
>>>> characters\n",
>>>>>> sizeof(c->gateway_buffer));
>>>>>>> +            ret = AVERROR(EINVAL);
>>>>>>> +            goto err;
>>>>>>> +        }
>>>>>>> +
>>>>>>> +        ret = 1;
>>>>>>> +        goto err;
>>>>>>> +    } else
>>>>>>> +        av_log(h, AV_LOG_DEBUG, "$IPFS_GATEWAY is empty.\n");
>>>>>>> +
>>>>>>> +    // We need to know the IPFS folder to - eventually - read the
>>>>>> contents of
>>>>>>> +    // the "gateway" file which would tell us the gateway to use.
>>>>>>> +    if (getenv("IPFS_PATH") == NULL) {
>>>>>>> +        av_log(h, AV_LOG_DEBUG, "$IPFS_PATH is empty.\n");
>>>>>>> +
>>>>>>> +        // Try via the home folder.
>>>>>>> +        if (getenv("HOME") == NULL) {
>>>>>>> +            av_log(h, AV_LOG_WARNING, "$HOME appears to be
>> empty.\n");
>>>>>>> +            ret = AVERROR(EINVAL);
>>>>>>> +            goto err;
>>>>>>> +        }
>>>>>>> +
>>>>>>> +        // Verify the composed path fits.
>>>>>>> +        if (snprintf(ipfs_full_data_folder,
>>>>>> sizeof(ipfs_full_data_folder),
>>>>>>> +                     "%s/.ipfs/", getenv("HOME")) >=
>>>>>> sizeof(ipfs_full_data_folder)) {
>>>>>>> +            av_log(h, AV_LOG_WARNING, "The IPFS data path exceeds
>> the
>>>>>> max path length (%zu)\n", sizeof(ipfs_full_data_folder));
>>>>>>> +            ret = AVERROR(EINVAL);
>>>>>>> +            goto err;
>>>>>>> +        }
>>>>>>> +
>>>>>>> +        // Stat the folder.
>>>>>>> +        // It should exist in a default IPFS setup when run as local
>>>>>> user.
>>>>>>> +#ifndef _WIN32
>>>>>>> +        stat_ret = stat(ipfs_full_data_folder, &st);
>>>>>>> +#else
>>>>>>> +        stat_ret = win32_stat(ipfs_full_data_folder, &st);
>>>>>>> +#endif
>>>>>>> +        if (stat_ret < 0) {
>>>>>>> +            av_log(h, AV_LOG_INFO, "Unable to find IPFS folder. We
>>>>>> tried:\n");
>>>>>>> +            av_log(h, AV_LOG_INFO, "- $IPFS_PATH, which was
>>>> empty.\n");
>>>>>>> +            av_log(h, AV_LOG_INFO, "- $HOME/.ipfs (full uri: %s)
>> which
>>>>>> doesn't exist.\n", ipfs_full_data_folder);
>>>>>>> +            ret = AVERROR(ENOENT);
>>>>>>> +            goto err;
>>>>>>> +        }
>>>>>>> +    } else {
>>>>>>> +        if (snprintf(ipfs_full_data_folder,
>>>>>> sizeof(ipfs_full_data_folder), "%s",
>>>>>>> +                 getenv("IPFS_PATH")) >=
>>>> sizeof(ipfs_full_data_folder))
>>>>>> {
>>>>>>> +            av_log(h, AV_LOG_WARNING, "The IPFS_PATH environment
>>>>>> variable exceeds the maximum length. We allow a max of %zu
>>>> characters\n",
>>>>>> sizeof(c->gateway_buffer));
>>>>>>> +            ret = AVERROR(EINVAL);
>>>>>>> +            goto err;
>>>>>>> +        }
>>>>>>> +
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    // Copy the fully composed gateway path into ipfs_gateway_file.
>>>>>>> +    if (snprintf(ipfs_gateway_file, sizeof(ipfs_gateway_file),
>>>>>> "%sgateway",
>>>>>>> +                 ipfs_full_data_folder) >=
>> sizeof(ipfs_gateway_file))
>>>> {
>>>>>>> +        av_log(h, AV_LOG_WARNING, "The IPFS gateway file path
>> exceeds
>>>>>> the max path length (%zu)\n", sizeof(ipfs_gateway_file));
>>>>>>> +        ret = AVERROR(ENOENT);
>>>>>>> +        goto err;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    // Get the contents of the gateway file.
>>>>>>> +    gateway_file = av_fopen_utf8(ipfs_gateway_file, "r");
>>>>>>> +    if (!gateway_file) {
>>>>>>> +        av_log(h, AV_LOG_WARNING, "The IPFS gateway file (full uri:
>>>> %s)
>>>>>> doesn't exist. Is the gateway enabled?\n", ipfs_gateway_file);
>>>>>>> +        ret = AVERROR(ENOENT);
>>>>>>> +        goto err;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    // Read a single line (fgets stops at new line mark).
>>>>>>> +    fgets(c->gateway_buffer, sizeof(c->gateway_buffer) - 1,
>>>>>> gateway_file);
>>>>>>> +
>>>>>>> +    // Replace the last char with \0
>>>>>>> +    c->gateway_buffer[sizeof(c->gateway_buffer) - 1] = 0;
>>
>> Btw: fgets normally zero-terminates the buffer (i.e. it reads at most
>> (sizeof(c->gateway_buffer) - 1) - 1 characters from the stream and adds
>> a \0. But on a read error it doesn't do this, instead the contents of
>> the buffer are indeterminate and so should not be used. Adding a zero at
>> the very end of gateway_buffer does not help to enforce this.
> 
> 
>> fgets returns NULL on read error and if nothing could be read due to
>> immediate EOF; so you should check for that.
>> (Btw: A protocol's context is zeroed initially, so
>> c->gateway_buffer[sizeof(c->gateway_buffer) - 1] is already zero,
>> because no one ever wrote anything else there.)
>>
> 
> Nice one! Nobody caught that before.
> So checking on NULL for fgets and removing the line to add a 0 at the end
> is the appropriate fix here I assume.
> 

Yes.

> Changed to what I assumed. Please correct me if I'm wrong.
> 
> 
>>
>>>>>>> +
>>>>>>> +    // Replace first occurence of end of line with \0
>>>>>>> +    c->gateway_buffer[strcspn(c->gateway_buffer, "\r")] = 0;
>>>>>>> +    c->gateway_buffer[strcspn(c->gateway_buffer, "\n")] = 0;
>>>>>>
>>>>>> If the buffer contains both \r and \n and the first \n precedes the
>>>>>> first \r, then the above zeroes both the first \r and the first \n. If
>>>>>> it is enough to only zero the first newline, then this can be
>> simplified
>>>>>> to "c->gateway_buffer[strcspn(c->gateway_buffer, "\r\n")] = 0;".
>>>>>>
>>>>>
>>>>> It used to be in your suggested simplified approach.
>>>>> It was then suggested to split it because of new line differences
>>>>> on different platforms. This was supposed to catch "everything".
>>>>>
>>>>> I prefer to keep this as-is now.
>>>>>
>>>>
>>>> Is it intended to use what is between the first \n and the first \r
>>>> lateron although this data will be after the '\0' after this? That would
>>>> be very weird.
>>>>
>>>
>>> You have to elaborate on that as that sounds really wrong to me.
>>>
>>
>> c->gateway_buffer[strcspn(c->gateway_buffer, "\r\n")] = 0;
>> overwrites the first \r or \n (whichever is first) with '\0' (if any;
>> otherwise it overwrites the terminating \0 with \0.
>>
>> c->gateway_buffer[strcspn(c->gateway_buffer, "\r")] = 0;
>> c->gateway_buffer[strcspn(c->gateway_buffer, "\n")] = 0;
>> also does this, but in case that the string originally contained both a
>> \r and a \n with the first \n preceding the first \r it would also zero
>> the first \r; this position is after the first terminating \0 (those at
>> the position where the first \n was), so it appeared to me that you
>> intended to use what is after the first \0. Because that is the only
>> difference between the approach with two calls to strcspn and the one
>> with one call to strcspn.
>>
>> But then I read that this has been suggested to you and found the mail
>> (https://ffmpeg.org/pipermail/ffmpeg-devel/2022-February/292590.html)
>> this referred to. There seems to be a misunderstanding in the semantics
>> of strcspn (most likely a confusion with strstr):
>> strcspn(c->gateway_buffer, "\r\n") works with single \r and single \n,
>> there need not be a single "\r\n" substring. Both methods would produce
>> strings that compare equal according to strcmp().
>>
>>
> Thank you for your elaborate explanation here!
> But now I'm not sure what the resolution - if any - should be?
> 

Just use strcspn(c->gateway_buffer, "\r\n"). It works.

> 
> So i leave it as "%zu". Check.
> I did also grep on the entire ffmpeg source for this. Both seems to be used
> in various places though granted, SIZE_SPECIFIER is used more.
> 

Btw: %zu usage in the vaapi files is fine given that this API is not
available on Windows so that limitations of Windows' libc are
irrelevant. If we discount these, usage becomes very loopsided.

>>
>>> - Directly contradicts earlier feedback
>>
>> I don't agree with that given that SIZE_SPECIFIER is basically just a
>> wrapper around zu.
>>
> 
> My mistake, it doesn't contradict it. Sorry for that.
> 
>>
>>> - And due to the above gives me a feeling of wasting time
>>> - Fine to change it but I hope there isn't another person popping up
>> with a
>>> question to change it yet again....
>>>
>>>
>>>>>
>>>>>
>>>>>>> +            ret = AVERROR(EINVAL);
>>>>>>> +            goto err;
>>>>>>> +        }
>>>>>>> +    } else {
>>>>>>> +        // Populate the IPFS gateway if we have any.
>>>>>>> +        // If not, inform the user how to properly set one.
>>>>>>> +        ret = populate_ipfs_gateway(h);
>>>>>>> +
>>>>>>> +        if (ret < 1) {
>>>>>>> +            // We fallback on dweb.link (managed by Protocol Labs).
>>>>>>> +            snprintf(c->gateway_buffer, sizeof(c->gateway_buffer), "
>>>>>> https://dweb.link");
>>>>>>> +
>>>>>>> +            av_log(h, AV_LOG_WARNING, "IPFS does not appear to be
>>>>>> running. You’re now using the public gateway at dweb.link.\n");
>>>>>>> +            av_log(h, AV_LOG_INFO, "Installing IPFS locally is
>>>>>> recommended to improve performance and reliability, and not share all
>>>> your
>>>>>> activity with a single IPFS gateway.\n");
>>>>>>> +            av_log(h, AV_LOG_INFO, "There are multiple options to
>>>>>> define this gateway.\n");
>>>>>>> +            av_log(h, AV_LOG_INFO, "1. Call ffmpeg with a gateway
>>>>>> param, without a trailing slash: -gateway <url>.\n");
>>>>>>> +            av_log(h, AV_LOG_INFO, "2. Define an $IPFS_GATEWAY
>>>>>> environment variable with the full HTTP URL to the gateway without
>>>> trailing
>>>>>> forward slash.\n");
>>>>>>> +            av_log(h, AV_LOG_INFO, "3. Define an $IPFS_PATH
>>>> environment
>>>>>> variable and point it to the IPFS data path - this is typically
>>>> ~/.ipfs\n");
>>>>>>
>>>>>> All those AV_LOG_INFO can be combined which has the advantage that the
>>>>>> logs can't be teared apart (which they can now if something else logs
>> at
>>>>>> the same time); furthermore, this would also decrease codesize.
>>>>>>
>>>>>
>>>>> Do you have an example of where that's happening?
>>>>>
>>>>
>>>> It can happen any time you have multiple threads using av_log at the
>>>> same time. Given that your statements are supposed to be full lines, it
>>>> is not that bad if it happens here, but it is nevertheless suboptimal.
>>>>
>>>
>>> I meant a multi-line av_log example ;) But yeah, I get how threading
>> could
>>> screw this up.
>>>
>>
>> I meant something like:
>> av_log(h, AV_LOG_INFO, "Installing IPFS locally is recommended to
>> improve performance and reliability, and not share all your activity
>> with a single IPFS gateway.\n"
>>        "There are multiple options to define this gateway.\n"
>>        "1. Call ffmpeg with a gateway param, without a trailing slash:
>> -gateway <url>.\n"
>>        "2. Define an $IPFS_GATEWAY environment variable with the full
>> HTTP URL to the gateway without trailing forward slash.\n"
>>        "3. Define an $IPFS_PATH environment variable and point it to the
>> IPFS data path - this is typically ~/.ipfs\n");
>>
>> (These lines are actually way too long.)
>>
> 
> Fixed as multiline.
> I do like to keep the content of the lines though.

I did not say that you need to change the actual content of the lines;
you just should split it into more lines in the source code (no need to
add any more newlines in the actual string!) in order to more adhere to
the (soft) 80 character line limit.

- Andreas
Mark Gaiser March 31, 2022, 11:56 p.m. UTC | #10
On Fri, Apr 1, 2022 at 1:01 AM Andreas Rheinhardt <
andreas.rheinhardt@outlook.com> wrote:

> Mark Gaiser:
> > On Thu, Mar 31, 2022 at 11:44 PM Andreas Rheinhardt <
> > andreas.rheinhardt@outlook.com> wrote:
> >
> >> Mark Gaiser:
> >>> On Wed, Mar 30, 2022 at 3:57 PM Andreas Rheinhardt <
> >>> andreas.rheinhardt@outlook.com> wrote:
> >>>
> >>>> Mark Gaiser:
> >>>>> On Wed, Mar 30, 2022 at 2:21 PM Andreas Rheinhardt <
> >>>>> andreas.rheinhardt@outlook.com> wrote:
> >>>>>
> >>>>>> Mark Gaiser:
> >>>>>>> This patch adds support for:
> >>>>>>> - ffplay ipfs://<cid>
> >>>>>>> - ffplay ipns://<cid>
> >>>>>>>
> >>>>>>> IPFS data can be played from so called "ipfs gateways".
> >>>>>>> A gateway is essentially a webserver that gives access to the
> >>>>>>> distributed IPFS network.
> >>>>>>>
> >>>>>>> This protocol support (ipfs and ipns) therefore translates
> >>>>>>> ipfs:// and ipns:// to a http:// url. This resulting url is
> >>>>>>> then handled by the http protocol. It could also be https
> >>>>>>> depending on the gateway provided.
> >>>>>>>
> >>>>>>> To use this protocol, a gateway must be provided.
> >>>>>>> If you do nothing it will try to find it in your
> >>>>>>> $HOME/.ipfs/gateway file. The ways to set it manually are:
> >>>>>>> 1. Define a -gateway <url> to the gateway.
> >>>>>>> 2. Define $IPFS_GATEWAY with the full http link to the gateway.
> >>>>>>> 3. Define $IPFS_PATH and point it to the IPFS data path.
> >>>>>>> 4. Have IPFS running in your local user folder (under $HOME/.ipfs).
> >>>>>>>
> >>>>>>> Signed-off-by: Mark Gaiser <markg85@gmail.com>
> >>>>>>> ---
> >>>>>>>  configure                 |   2 +
> >>>>>>>  doc/protocols.texi        |  30 ++++
> >>>>>>>  libavformat/Makefile      |   2 +
> >>>>>>>  libavformat/ipfsgateway.c | 309
> >> ++++++++++++++++++++++++++++++++++++++
> >>>>>>>  libavformat/protocols.c   |   2 +
> >>>>>>>  5 files changed, 345 insertions(+)
> >>>>>>>  create mode 100644 libavformat/ipfsgateway.c
> >>>>>>>
> >>>>>>> diff --git a/configure b/configure
> >>>>>>> index e4d36aa639..55af90957a 100755
> >>>>>>> --- a/configure
> >>>>>>> +++ b/configure
> >>>>>>> @@ -3579,6 +3579,8 @@ udp_protocol_select="network"
> >>>>>>>  udplite_protocol_select="network"
> >>>>>>>  unix_protocol_deps="sys_un_h"
> >>>>>>>  unix_protocol_select="network"
> >>>>>>> +ipfs_protocol_select="https_protocol"
> >>>>>>> +ipns_protocol_select="https_protocol"
> >>>>>>>
> >>>>>>>  # external library protocols
> >>>>>>>  libamqp_protocol_deps="librabbitmq"
> >>>>>>> diff --git a/doc/protocols.texi b/doc/protocols.texi
> >>>>>>> index d207df0b52..7c9c0a4808 100644
> >>>>>>> --- a/doc/protocols.texi
> >>>>>>> +++ b/doc/protocols.texi
> >>>>>>> @@ -2025,5 +2025,35 @@ decoding errors.
> >>>>>>>
> >>>>>>>  @end table
> >>>>>>>
> >>>>>>> +@section ipfs
> >>>>>>> +
> >>>>>>> +InterPlanetary File System (IPFS) protocol support. One can access
> >>>>>> files stored
> >>>>>>> +on the IPFS network through so called gateways. Those are http(s)
> >>>>>> endpoints.
> >>>>>>> +This protocol wraps the IPFS native protocols (ipfs:// and
> ipns://)
> >> to
> >>>>>> be send
> >>>>>>> +to such a gateway. Users can (and should) host their own node
> which
> >>>>>> means this
> >>>>>>> +protocol will use your local machine gateway to access files on
> the
> >>>>>> IPFS network.
> >>>>>>> +
> >>>>>>> +If a user doesn't have a node of their own then the public gateway
> >>>>>> dweb.link is
> >>>>>>> +used by default.
> >>>>>>> +
> >>>>>>> +You can use this protocol in 2 ways. Using IPFS:
> >>>>>>> +@example
> >>>>>>> +ffplay ipfs://QmbGtJg23skhvFmu9mJiePVByhfzu5rwo74MEkVDYAmF5T
> >>>>>>> +@end example
> >>>>>>> +
> >>>>>>> +Or the IPNS protocol (IPNS is mutable IPFS):
> >>>>>>> +@example
> >>>>>>> +ffplay ipns://QmbGtJg23skhvFmu9mJiePVByhfzu5rwo74MEkVDYAmF5T
> >>>>>>> +@end example
> >>>>>>> +
> >>>>>>> +You can also change the gateway to be used:
> >>>>>>> +
> >>>>>>> +@table @option
> >>>>>>> +
> >>>>>>> +@item gateway
> >>>>>>> +Defines the gateway to use. When nothing is provided the protocol
> >> will
> >>>>>> first try
> >>>>>>> +your local gateway. If that fails dweb.link will be used.
> >>>>>>> +
> >>>>>>> +@end table
> >>>>>>>
> >>>>>>>  @c man end PROTOCOLS
> >>>>>>> diff --git a/libavformat/Makefile b/libavformat/Makefile
> >>>>>>> index d7182d6bd8..e3233fd7ac 100644
> >>>>>>> --- a/libavformat/Makefile
> >>>>>>> +++ b/libavformat/Makefile
> >>>>>>> @@ -660,6 +660,8 @@ OBJS-$(CONFIG_SRTP_PROTOCOL)             +=
> >>>>>> srtpproto.o srtp.o
> >>>>>>>  OBJS-$(CONFIG_SUBFILE_PROTOCOL)          += subfile.o
> >>>>>>>  OBJS-$(CONFIG_TEE_PROTOCOL)              += teeproto.o
> tee_common.o
> >>>>>>>  OBJS-$(CONFIG_TCP_PROTOCOL)              += tcp.o
> >>>>>>> +OBJS-$(CONFIG_IPFS_PROTOCOL)             += ipfsgateway.o
> >>>>>>> +OBJS-$(CONFIG_IPNS_PROTOCOL)             += ipfsgateway.o
> >>>>>>>  TLS-OBJS-$(CONFIG_GNUTLS)                += tls_gnutls.o
> >>>>>>>  TLS-OBJS-$(CONFIG_LIBTLS)                += tls_libtls.o
> >>>>>>>  TLS-OBJS-$(CONFIG_MBEDTLS)               += tls_mbedtls.o
> >>>>>>> diff --git a/libavformat/ipfsgateway.c b/libavformat/ipfsgateway.c
> >>>>>>> new file mode 100644
> >>>>>>> index 0000000000..1a039589c0
> >>>>>>> --- /dev/null
> >>>>>>> +++ b/libavformat/ipfsgateway.c
> >>>>>>> @@ -0,0 +1,309 @@
> >>>>>>> +/*
> >>>>>>> + * IPFS and IPNS protocol support through IPFS Gateway.
> >>>>>>> + * Copyright (c) 2022 Mark Gaiser
> >>>>>>> + *
> >>>>>>> + * This file is part of FFmpeg.
> >>>>>>> + *
> >>>>>>> + * FFmpeg is free software; you can redistribute it and/or
> >>>>>>> + * modify it under the terms of the GNU Lesser General Public
> >>>>>>> + * License as published by the Free Software Foundation; either
> >>>>>>> + * version 2.1 of the License, or (at your option) any later
> >> version.
> >>>>>>> + *
> >>>>>>> + * FFmpeg is distributed in the hope that it will be useful,
> >>>>>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> >>>>>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> GNU
> >>>>>>> + * Lesser General Public License for more details.
> >>>>>>> + *
> >>>>>>> + * You should have received a copy of the GNU Lesser General
> Public
> >>>>>>> + * License along with FFmpeg; if not, write to the Free Software
> >>>>>>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> >>>>>> 02110-1301 USA
> >>>>>>> + */
> >>>>>>> +
> >>>>>>> +#include "avformat.h"
> >>>>>>> +#include "libavutil/avassert.h"
> >>>>>>
> >>>>>> Unused.
> >>>>>>
> >>>>>>> +#include "libavutil/avstring.h"
> >>>>>>> +#include "libavutil/internal.h"
> >>>>>>> +#include "libavutil/opt.h"
> >>>>>>> +#include "libavutil/tree.h"
> >>>>>>
> >>>>>> ?
> >>>>>>
> >>>>>
> >>>>> This whole include part can be cleaned up much more.
> >>>>> Just having these:
> >>>>> #include "libavutil/avstring.h"
> >>>>> #include "libavutil/opt.h"
> >>>>> #include "url.h"
> >>>>> #include <sys/stat.h>
> >>>>>
> >>>>> is enough.
> >>>>>
> >>>>>
> >>>>>>> +#include <fcntl.h>
> >>>>>>> +#if HAVE_IO_H
> >>>>>>> +#include <io.h>
> >>>>>>> +#endif
> >>>>>>> +#if HAVE_UNISTD_H
> >>>>>>> +#include <unistd.h>
> >>>>>>> +#endif
> >>>>>>> +#include "os_support.h"
> >>>>>>> +#include "url.h"
> >>>>>>> +#include <stdlib.h>
> >>>>>>> +#include <sys/stat.h>
> >>>>>>> +
> >>>>>>> +typedef struct IPFSGatewayContext {
> >>>>>>> +    AVClass *class;
> >>>>>>> +    URLContext *inner;
> >>>>>>> +    // Is filled by the -gateway argument and not changed after.
> >>>>>>> +    char *gateway;
> >>>>>>> +    // If the above gateway is non null, it will be copied into
> this
> >>>>>> buffer.
> >>>>>>> +    // Else this buffer will contain the auto detected gateway.
> >>>>>>> +    // In either case, the gateway to use will be in this buffer.
> >>>>>>> +    char gateway_buffer[PATH_MAX];
> >>>>>>> +} IPFSGatewayContext;
> >>>>>>> +
> >>>>>>> +// A best-effort way to find the IPFS gateway.
> >>>>>>> +// Only the most appropiate gateway is set. It's not actually
> >>>> requested
> >>>>>>> +// (http call) to prevent a potential slowdown in startup. A
> >> potential
> >>>>>> timeout
> >>>>>>> +// is handled by the HTTP protocol.
> >>>>>>> +static int populate_ipfs_gateway(URLContext *h)
> >>>>>>> +{
> >>>>>>> +    IPFSGatewayContext *c = h->priv_data;
> >>>>>>> +    char ipfs_full_data_folder[PATH_MAX];
> >>>>>>> +    char ipfs_gateway_file[PATH_MAX];
> >>>>>>> +    struct stat st;
> >>>>>>> +    int stat_ret = 0;
> >>>>>>> +    int ret = AVERROR(EINVAL);
> >>>>>>> +    FILE *gateway_file = NULL;
> >>>>>>> +
> >>>>>>> +    // Test $IPFS_GATEWAY.
> >>>>>>> +    if (getenv("IPFS_GATEWAY") != NULL) {
> >>>>>>> +        if (snprintf(c->gateway_buffer, sizeof(c->gateway_buffer),
> >>>> "%s",
> >>>>>>> +                     getenv("IPFS_GATEWAY")) >=
> >>>>>> sizeof(c->gateway_buffer)) {
> >>>>>>> +            av_log(h, AV_LOG_WARNING, "The IPFS_GATEWAY
> environment
> >>>>>> variable exceeds the maximum length. We allow a max of %zu
> >>>> characters\n",
> >>>>>> sizeof(c->gateway_buffer));
> >>>>>>> +            ret = AVERROR(EINVAL);
> >>>>>>> +            goto err;
> >>>>>>> +        }
> >>>>>>> +
> >>>>>>> +        ret = 1;
> >>>>>>> +        goto err;
> >>>>>>> +    } else
> >>>>>>> +        av_log(h, AV_LOG_DEBUG, "$IPFS_GATEWAY is empty.\n");
> >>>>>>> +
> >>>>>>> +    // We need to know the IPFS folder to - eventually - read the
> >>>>>> contents of
> >>>>>>> +    // the "gateway" file which would tell us the gateway to use.
> >>>>>>> +    if (getenv("IPFS_PATH") == NULL) {
> >>>>>>> +        av_log(h, AV_LOG_DEBUG, "$IPFS_PATH is empty.\n");
> >>>>>>> +
> >>>>>>> +        // Try via the home folder.
> >>>>>>> +        if (getenv("HOME") == NULL) {
> >>>>>>> +            av_log(h, AV_LOG_WARNING, "$HOME appears to be
> >> empty.\n");
> >>>>>>> +            ret = AVERROR(EINVAL);
> >>>>>>> +            goto err;
> >>>>>>> +        }
> >>>>>>> +
> >>>>>>> +        // Verify the composed path fits.
> >>>>>>> +        if (snprintf(ipfs_full_data_folder,
> >>>>>> sizeof(ipfs_full_data_folder),
> >>>>>>> +                     "%s/.ipfs/", getenv("HOME")) >=
> >>>>>> sizeof(ipfs_full_data_folder)) {
> >>>>>>> +            av_log(h, AV_LOG_WARNING, "The IPFS data path exceeds
> >> the
> >>>>>> max path length (%zu)\n", sizeof(ipfs_full_data_folder));
> >>>>>>> +            ret = AVERROR(EINVAL);
> >>>>>>> +            goto err;
> >>>>>>> +        }
> >>>>>>> +
> >>>>>>> +        // Stat the folder.
> >>>>>>> +        // It should exist in a default IPFS setup when run as
> local
> >>>>>> user.
> >>>>>>> +#ifndef _WIN32
> >>>>>>> +        stat_ret = stat(ipfs_full_data_folder, &st);
> >>>>>>> +#else
> >>>>>>> +        stat_ret = win32_stat(ipfs_full_data_folder, &st);
> >>>>>>> +#endif
> >>>>>>> +        if (stat_ret < 0) {
> >>>>>>> +            av_log(h, AV_LOG_INFO, "Unable to find IPFS folder. We
> >>>>>> tried:\n");
> >>>>>>> +            av_log(h, AV_LOG_INFO, "- $IPFS_PATH, which was
> >>>> empty.\n");
> >>>>>>> +            av_log(h, AV_LOG_INFO, "- $HOME/.ipfs (full uri: %s)
> >> which
> >>>>>> doesn't exist.\n", ipfs_full_data_folder);
> >>>>>>> +            ret = AVERROR(ENOENT);
> >>>>>>> +            goto err;
> >>>>>>> +        }
> >>>>>>> +    } else {
> >>>>>>> +        if (snprintf(ipfs_full_data_folder,
> >>>>>> sizeof(ipfs_full_data_folder), "%s",
> >>>>>>> +                 getenv("IPFS_PATH")) >=
> >>>> sizeof(ipfs_full_data_folder))
> >>>>>> {
> >>>>>>> +            av_log(h, AV_LOG_WARNING, "The IPFS_PATH environment
> >>>>>> variable exceeds the maximum length. We allow a max of %zu
> >>>> characters\n",
> >>>>>> sizeof(c->gateway_buffer));
> >>>>>>> +            ret = AVERROR(EINVAL);
> >>>>>>> +            goto err;
> >>>>>>> +        }
> >>>>>>> +
> >>>>>>> +    }
> >>>>>>> +
> >>>>>>> +    // Copy the fully composed gateway path into
> ipfs_gateway_file.
> >>>>>>> +    if (snprintf(ipfs_gateway_file, sizeof(ipfs_gateway_file),
> >>>>>> "%sgateway",
> >>>>>>> +                 ipfs_full_data_folder) >=
> >> sizeof(ipfs_gateway_file))
> >>>> {
> >>>>>>> +        av_log(h, AV_LOG_WARNING, "The IPFS gateway file path
> >> exceeds
> >>>>>> the max path length (%zu)\n", sizeof(ipfs_gateway_file));
> >>>>>>> +        ret = AVERROR(ENOENT);
> >>>>>>> +        goto err;
> >>>>>>> +    }
> >>>>>>> +
> >>>>>>> +    // Get the contents of the gateway file.
> >>>>>>> +    gateway_file = av_fopen_utf8(ipfs_gateway_file, "r");
> >>>>>>> +    if (!gateway_file) {
> >>>>>>> +        av_log(h, AV_LOG_WARNING, "The IPFS gateway file (full
> uri:
> >>>> %s)
> >>>>>> doesn't exist. Is the gateway enabled?\n", ipfs_gateway_file);
> >>>>>>> +        ret = AVERROR(ENOENT);
> >>>>>>> +        goto err;
> >>>>>>> +    }
> >>>>>>> +
> >>>>>>> +    // Read a single line (fgets stops at new line mark).
> >>>>>>> +    fgets(c->gateway_buffer, sizeof(c->gateway_buffer) - 1,
> >>>>>> gateway_file);
> >>>>>>> +
> >>>>>>> +    // Replace the last char with \0
> >>>>>>> +    c->gateway_buffer[sizeof(c->gateway_buffer) - 1] = 0;
> >>
> >> Btw: fgets normally zero-terminates the buffer (i.e. it reads at most
> >> (sizeof(c->gateway_buffer) - 1) - 1 characters from the stream and adds
> >> a \0. But on a read error it doesn't do this, instead the contents of
> >> the buffer are indeterminate and so should not be used. Adding a zero at
> >> the very end of gateway_buffer does not help to enforce this.
> >
> >
> >> fgets returns NULL on read error and if nothing could be read due to
> >> immediate EOF; so you should check for that.
> >> (Btw: A protocol's context is zeroed initially, so
> >> c->gateway_buffer[sizeof(c->gateway_buffer) - 1] is already zero,
> >> because no one ever wrote anything else there.)
> >>
> >
> > Nice one! Nobody caught that before.
> > So checking on NULL for fgets and removing the line to add a 0 at the end
> > is the appropriate fix here I assume.
> >
>
> Yes.
>
> > Changed to what I assumed. Please correct me if I'm wrong.
> >
> >
> >>
> >>>>>>> +
> >>>>>>> +    // Replace first occurence of end of line with \0
> >>>>>>> +    c->gateway_buffer[strcspn(c->gateway_buffer, "\r")] = 0;
> >>>>>>> +    c->gateway_buffer[strcspn(c->gateway_buffer, "\n")] = 0;
> >>>>>>
> >>>>>> If the buffer contains both \r and \n and the first \n precedes the
> >>>>>> first \r, then the above zeroes both the first \r and the first \n.
> If
> >>>>>> it is enough to only zero the first newline, then this can be
> >> simplified
> >>>>>> to "c->gateway_buffer[strcspn(c->gateway_buffer, "\r\n")] = 0;".
> >>>>>>
> >>>>>
> >>>>> It used to be in your suggested simplified approach.
> >>>>> It was then suggested to split it because of new line differences
> >>>>> on different platforms. This was supposed to catch "everything".
> >>>>>
> >>>>> I prefer to keep this as-is now.
> >>>>>
> >>>>
> >>>> Is it intended to use what is between the first \n and the first \r
> >>>> lateron although this data will be after the '\0' after this? That
> would
> >>>> be very weird.
> >>>>
> >>>
> >>> You have to elaborate on that as that sounds really wrong to me.
> >>>
> >>
> >> c->gateway_buffer[strcspn(c->gateway_buffer, "\r\n")] = 0;
> >> overwrites the first \r or \n (whichever is first) with '\0' (if any;
> >> otherwise it overwrites the terminating \0 with \0.
> >>
> >> c->gateway_buffer[strcspn(c->gateway_buffer, "\r")] = 0;
> >> c->gateway_buffer[strcspn(c->gateway_buffer, "\n")] = 0;
> >> also does this, but in case that the string originally contained both a
> >> \r and a \n with the first \n preceding the first \r it would also zero
> >> the first \r; this position is after the first terminating \0 (those at
> >> the position where the first \n was), so it appeared to me that you
> >> intended to use what is after the first \0. Because that is the only
> >> difference between the approach with two calls to strcspn and the one
> >> with one call to strcspn.
> >>
> >> But then I read that this has been suggested to you and found the mail
> >> (https://ffmpeg.org/pipermail/ffmpeg-devel/2022-February/292590.html)
> >> this referred to. There seems to be a misunderstanding in the semantics
> >> of strcspn (most likely a confusion with strstr):
> >> strcspn(c->gateway_buffer, "\r\n") works with single \r and single \n,
> >> there need not be a single "\r\n" substring. Both methods would produce
> >> strings that compare equal according to strcmp().
> >>
> >>
> > Thank you for your elaborate explanation here!
> > But now I'm not sure what the resolution - if any - should be?
> >
>
> Just use strcspn(c->gateway_buffer, "\r\n"). It works.
>
> >
> > So i leave it as "%zu". Check.
> > I did also grep on the entire ffmpeg source for this. Both seems to be
> used
> > in various places though granted, SIZE_SPECIFIER is used more.
> >
>
> Btw: %zu usage in the vaapi files is fine given that this API is not
> available on Windows so that limitations of Windows' libc are
> irrelevant. If we discount these, usage becomes very loopsided.
>
> >>
> >>> - Directly contradicts earlier feedback
> >>
> >> I don't agree with that given that SIZE_SPECIFIER is basically just a
> >> wrapper around zu.
> >>
> >
> > My mistake, it doesn't contradict it. Sorry for that.
> >
> >>
> >>> - And due to the above gives me a feeling of wasting time
> >>> - Fine to change it but I hope there isn't another person popping up
> >> with a
> >>> question to change it yet again....
> >>>
> >>>
> >>>>>
> >>>>>
> >>>>>>> +            ret = AVERROR(EINVAL);
> >>>>>>> +            goto err;
> >>>>>>> +        }
> >>>>>>> +    } else {
> >>>>>>> +        // Populate the IPFS gateway if we have any.
> >>>>>>> +        // If not, inform the user how to properly set one.
> >>>>>>> +        ret = populate_ipfs_gateway(h);
> >>>>>>> +
> >>>>>>> +        if (ret < 1) {
> >>>>>>> +            // We fallback on dweb.link (managed by Protocol
> Labs).
> >>>>>>> +            snprintf(c->gateway_buffer,
> sizeof(c->gateway_buffer), "
> >>>>>> https://dweb.link");
> >>>>>>> +
> >>>>>>> +            av_log(h, AV_LOG_WARNING, "IPFS does not appear to be
> >>>>>> running. You’re now using the public gateway at dweb.link.\n");
> >>>>>>> +            av_log(h, AV_LOG_INFO, "Installing IPFS locally is
> >>>>>> recommended to improve performance and reliability, and not share
> all
> >>>> your
> >>>>>> activity with a single IPFS gateway.\n");
> >>>>>>> +            av_log(h, AV_LOG_INFO, "There are multiple options to
> >>>>>> define this gateway.\n");
> >>>>>>> +            av_log(h, AV_LOG_INFO, "1. Call ffmpeg with a gateway
> >>>>>> param, without a trailing slash: -gateway <url>.\n");
> >>>>>>> +            av_log(h, AV_LOG_INFO, "2. Define an $IPFS_GATEWAY
> >>>>>> environment variable with the full HTTP URL to the gateway without
> >>>> trailing
> >>>>>> forward slash.\n");
> >>>>>>> +            av_log(h, AV_LOG_INFO, "3. Define an $IPFS_PATH
> >>>> environment
> >>>>>> variable and point it to the IPFS data path - this is typically
> >>>> ~/.ipfs\n");
> >>>>>>
> >>>>>> All those AV_LOG_INFO can be combined which has the advantage that
> the
> >>>>>> logs can't be teared apart (which they can now if something else
> logs
> >> at
> >>>>>> the same time); furthermore, this would also decrease codesize.
> >>>>>>
> >>>>>
> >>>>> Do you have an example of where that's happening?
> >>>>>
> >>>>
> >>>> It can happen any time you have multiple threads using av_log at the
> >>>> same time. Given that your statements are supposed to be full lines,
> it
> >>>> is not that bad if it happens here, but it is nevertheless suboptimal.
> >>>>
> >>>
> >>> I meant a multi-line av_log example ;) But yeah, I get how threading
> >> could
> >>> screw this up.
> >>>
> >>
> >> I meant something like:
> >> av_log(h, AV_LOG_INFO, "Installing IPFS locally is recommended to
> >> improve performance and reliability, and not share all your activity
> >> with a single IPFS gateway.\n"
> >>        "There are multiple options to define this gateway.\n"
> >>        "1. Call ffmpeg with a gateway param, without a trailing slash:
> >> -gateway <url>.\n"
> >>        "2. Define an $IPFS_GATEWAY environment variable with the full
> >> HTTP URL to the gateway without trailing forward slash.\n"
> >>        "3. Define an $IPFS_PATH environment variable and point it to the
> >> IPFS data path - this is typically ~/.ipfs\n");
> >>
> >> (These lines are actually way too long.)
> >>
> >
> > Fixed as multiline.
> > I do like to keep the content of the lines though.
>
> I did not say that you need to change the actual content of the lines;
> you just should split it into more lines in the source code (no need to
> add any more newlines in the actual string!) in order to more adhere to
> the (soft) 80 character line limit.
>

;)

I changed it to be more in line with the 80 character limit. Some lines are
still outside it though.
Again for readability, this isn't working in its favor... But then again,
it's only annoying when you change the text.
Something that shouldn't be needed anymore as that's about final.

Thank you so much for your time and help!
I'll send V11 of this patch round shortly!


>
> - Andreas
>
diff mbox series

Patch

diff --git a/configure b/configure
index e4d36aa639..55af90957a 100755
--- a/configure
+++ b/configure
@@ -3579,6 +3579,8 @@  udp_protocol_select="network"
 udplite_protocol_select="network"
 unix_protocol_deps="sys_un_h"
 unix_protocol_select="network"
+ipfs_protocol_select="https_protocol"
+ipns_protocol_select="https_protocol"
 
 # external library protocols
 libamqp_protocol_deps="librabbitmq"
diff --git a/doc/protocols.texi b/doc/protocols.texi
index d207df0b52..7c9c0a4808 100644
--- a/doc/protocols.texi
+++ b/doc/protocols.texi
@@ -2025,5 +2025,35 @@  decoding errors.
 
 @end table
 
+@section ipfs
+
+InterPlanetary File System (IPFS) protocol support. One can access files stored 
+on the IPFS network through so called gateways. Those are http(s) endpoints.
+This protocol wraps the IPFS native protocols (ipfs:// and ipns://) to be send 
+to such a gateway. Users can (and should) host their own node which means this 
+protocol will use your local machine gateway to access files on the IPFS network.
+
+If a user doesn't have a node of their own then the public gateway dweb.link is 
+used by default.
+
+You can use this protocol in 2 ways. Using IPFS:
+@example
+ffplay ipfs://QmbGtJg23skhvFmu9mJiePVByhfzu5rwo74MEkVDYAmF5T
+@end example
+
+Or the IPNS protocol (IPNS is mutable IPFS):
+@example
+ffplay ipns://QmbGtJg23skhvFmu9mJiePVByhfzu5rwo74MEkVDYAmF5T
+@end example
+
+You can also change the gateway to be used:
+
+@table @option
+
+@item gateway
+Defines the gateway to use. When nothing is provided the protocol will first try 
+your local gateway. If that fails dweb.link will be used.
+
+@end table
 
 @c man end PROTOCOLS
diff --git a/libavformat/Makefile b/libavformat/Makefile
index d7182d6bd8..e3233fd7ac 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -660,6 +660,8 @@  OBJS-$(CONFIG_SRTP_PROTOCOL)             += srtpproto.o srtp.o
 OBJS-$(CONFIG_SUBFILE_PROTOCOL)          += subfile.o
 OBJS-$(CONFIG_TEE_PROTOCOL)              += teeproto.o tee_common.o
 OBJS-$(CONFIG_TCP_PROTOCOL)              += tcp.o
+OBJS-$(CONFIG_IPFS_PROTOCOL)             += ipfsgateway.o
+OBJS-$(CONFIG_IPNS_PROTOCOL)             += ipfsgateway.o
 TLS-OBJS-$(CONFIG_GNUTLS)                += tls_gnutls.o
 TLS-OBJS-$(CONFIG_LIBTLS)                += tls_libtls.o
 TLS-OBJS-$(CONFIG_MBEDTLS)               += tls_mbedtls.o
diff --git a/libavformat/ipfsgateway.c b/libavformat/ipfsgateway.c
new file mode 100644
index 0000000000..1a039589c0
--- /dev/null
+++ b/libavformat/ipfsgateway.c
@@ -0,0 +1,309 @@ 
+/*
+ * IPFS and IPNS protocol support through IPFS Gateway.
+ * Copyright (c) 2022 Mark Gaiser
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avformat.h"
+#include "libavutil/avassert.h"
+#include "libavutil/avstring.h"
+#include "libavutil/internal.h"
+#include "libavutil/opt.h"
+#include "libavutil/tree.h"
+#include <fcntl.h>
+#if HAVE_IO_H
+#include <io.h>
+#endif
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include "os_support.h"
+#include "url.h"
+#include <stdlib.h>
+#include <sys/stat.h>
+
+typedef struct IPFSGatewayContext {
+    AVClass *class;
+    URLContext *inner;
+    // Is filled by the -gateway argument and not changed after.
+    char *gateway;
+    // If the above gateway is non null, it will be copied into this buffer.
+    // Else this buffer will contain the auto detected gateway.
+    // In either case, the gateway to use will be in this buffer.
+    char gateway_buffer[PATH_MAX];
+} IPFSGatewayContext;
+
+// A best-effort way to find the IPFS gateway.
+// Only the most appropiate gateway is set. It's not actually requested
+// (http call) to prevent a potential slowdown in startup. A potential timeout
+// is handled by the HTTP protocol.
+static int populate_ipfs_gateway(URLContext *h)
+{
+    IPFSGatewayContext *c = h->priv_data;
+    char ipfs_full_data_folder[PATH_MAX];
+    char ipfs_gateway_file[PATH_MAX];
+    struct stat st;
+    int stat_ret = 0;
+    int ret = AVERROR(EINVAL);
+    FILE *gateway_file = NULL;
+
+    // Test $IPFS_GATEWAY.
+    if (getenv("IPFS_GATEWAY") != NULL) {
+        if (snprintf(c->gateway_buffer, sizeof(c->gateway_buffer), "%s",
+                     getenv("IPFS_GATEWAY")) >= sizeof(c->gateway_buffer)) {
+            av_log(h, AV_LOG_WARNING, "The IPFS_GATEWAY environment variable exceeds the maximum length. We allow a max of %zu characters\n", sizeof(c->gateway_buffer));
+            ret = AVERROR(EINVAL);
+            goto err;
+        }
+
+        ret = 1;
+        goto err;
+    } else
+        av_log(h, AV_LOG_DEBUG, "$IPFS_GATEWAY is empty.\n");
+
+    // We need to know the IPFS folder to - eventually - read the contents of
+    // the "gateway" file which would tell us the gateway to use.
+    if (getenv("IPFS_PATH") == NULL) {
+        av_log(h, AV_LOG_DEBUG, "$IPFS_PATH is empty.\n");
+
+        // Try via the home folder.
+        if (getenv("HOME") == NULL) {
+            av_log(h, AV_LOG_WARNING, "$HOME appears to be empty.\n");
+            ret = AVERROR(EINVAL);
+            goto err;
+        }
+
+        // Verify the composed path fits.
+        if (snprintf(ipfs_full_data_folder, sizeof(ipfs_full_data_folder),
+                     "%s/.ipfs/", getenv("HOME")) >= sizeof(ipfs_full_data_folder)) {
+            av_log(h, AV_LOG_WARNING, "The IPFS data path exceeds the max path length (%zu)\n", sizeof(ipfs_full_data_folder));
+            ret = AVERROR(EINVAL);
+            goto err;
+        }
+
+        // Stat the folder.
+        // It should exist in a default IPFS setup when run as local user.
+#ifndef _WIN32
+        stat_ret = stat(ipfs_full_data_folder, &st);
+#else
+        stat_ret = win32_stat(ipfs_full_data_folder, &st);
+#endif
+        if (stat_ret < 0) {
+            av_log(h, AV_LOG_INFO, "Unable to find IPFS folder. We tried:\n");
+            av_log(h, AV_LOG_INFO, "- $IPFS_PATH, which was empty.\n");
+            av_log(h, AV_LOG_INFO, "- $HOME/.ipfs (full uri: %s) which doesn't exist.\n", ipfs_full_data_folder);
+            ret = AVERROR(ENOENT);
+            goto err;
+        }
+    } else {
+        if (snprintf(ipfs_full_data_folder, sizeof(ipfs_full_data_folder), "%s",
+                 getenv("IPFS_PATH")) >= sizeof(ipfs_full_data_folder)) {
+            av_log(h, AV_LOG_WARNING, "The IPFS_PATH environment variable exceeds the maximum length. We allow a max of %zu characters\n", sizeof(c->gateway_buffer));
+            ret = AVERROR(EINVAL);
+            goto err;
+        }
+
+    }
+
+    // Copy the fully composed gateway path into ipfs_gateway_file.
+    if (snprintf(ipfs_gateway_file, sizeof(ipfs_gateway_file), "%sgateway",
+                 ipfs_full_data_folder) >= sizeof(ipfs_gateway_file)) {
+        av_log(h, AV_LOG_WARNING, "The IPFS gateway file path exceeds the max path length (%zu)\n", sizeof(ipfs_gateway_file));
+        ret = AVERROR(ENOENT);
+        goto err;
+    }
+
+    // Get the contents of the gateway file.
+    gateway_file = av_fopen_utf8(ipfs_gateway_file, "r");
+    if (!gateway_file) {
+        av_log(h, AV_LOG_WARNING, "The IPFS gateway file (full uri: %s) doesn't exist. Is the gateway enabled?\n", ipfs_gateway_file);
+        ret = AVERROR(ENOENT);
+        goto err;
+    }
+
+    // Read a single line (fgets stops at new line mark).
+    fgets(c->gateway_buffer, sizeof(c->gateway_buffer) - 1, gateway_file);
+
+    // Replace the last char with \0
+    c->gateway_buffer[sizeof(c->gateway_buffer) - 1] = 0;
+
+    // Replace first occurence of end of line with \0
+    c->gateway_buffer[strcspn(c->gateway_buffer, "\r")] = 0;
+    c->gateway_buffer[strcspn(c->gateway_buffer, "\n")] = 0;
+
+    // If strlen finds anything longer then 0 characters then we have a
+    // potential gateway url.
+    if (strlen(c->gateway_buffer) < 1) {
+        av_log(h, AV_LOG_WARNING, "The IPFS gateway file (full uri: %s) appears to be empty. Is the gateway started?\n", ipfs_gateway_file);
+        ret = AVERROR(EILSEQ);
+        goto err;
+    } else {
+        // We're done, the c->gateway_buffer has something that looks valid.
+        ret = 1;
+        goto err;
+    }
+
+err:
+    if (gateway_file)
+        fclose(gateway_file);
+
+    return ret;
+}
+
+static int translate_ipfs_to_http(URLContext *h, const char *uri,
+                                  int flags, AVDictionary **options)
+{
+    const char *ipfs_cid;
+    char *fulluri = NULL;
+    int ret;
+    IPFSGatewayContext *c = h->priv_data;
+
+    // Test for ipfs://, ipfs:, ipns:// and ipns:. This prefix is stripped from
+    // the string leaving just the CID in ipfs_cid.
+    int is_ipfs = av_stristart(uri, "ipfs://", &ipfs_cid);
+    int is_ipns = av_stristart(uri, "ipns://", &ipfs_cid);
+
+    // We must have either ipns or ipfs.
+    if (!is_ipfs && !is_ipns) {
+        ret = AVERROR(EINVAL);
+        av_log(h, AV_LOG_WARNING, "Unsupported url %s\n", uri);
+        goto err;
+    }
+
+    // If the CID has a length greater then 0 then we assume we have a proper working one.
+    // It could still be wrong but in that case the gateway should save us and
+    // ruturn a 403 error. The http protocol handles this.
+    if (strlen(ipfs_cid) < 1) {
+        av_log(h, AV_LOG_WARNING, "A CID must be provided.\n");
+        ret = AVERROR(EILSEQ);
+        goto err;
+    }
+
+    // Populate c->gateway_buffer with whatever is in c->gateway
+    if (c->gateway != NULL) {
+        if (snprintf(c->gateway_buffer, sizeof(c->gateway_buffer), "%s",
+                     c->gateway) >= sizeof(c->gateway_buffer)) {
+            av_log(h, AV_LOG_WARNING, "The -gateway parameter is too long. We allow a max of %zu characters\n", sizeof(c->gateway_buffer));
+            ret = AVERROR(EINVAL);
+            goto err;
+        }
+    } else {
+        // Populate the IPFS gateway if we have any.
+        // If not, inform the user how to properly set one.
+        ret = populate_ipfs_gateway(h);
+
+        if (ret < 1) {
+            // We fallback on dweb.link (managed by Protocol Labs).
+            snprintf(c->gateway_buffer, sizeof(c->gateway_buffer), "https://dweb.link");
+
+            av_log(h, AV_LOG_WARNING, "IPFS does not appear to be running. You’re now using the public gateway at dweb.link.\n");
+            av_log(h, AV_LOG_INFO, "Installing IPFS locally is recommended to improve performance and reliability, and not share all your activity with a single IPFS gateway.\n");
+            av_log(h, AV_LOG_INFO, "There are multiple options to define this gateway.\n");
+            av_log(h, AV_LOG_INFO, "1. Call ffmpeg with a gateway param, without a trailing slash: -gateway <url>.\n");
+            av_log(h, AV_LOG_INFO, "2. Define an $IPFS_GATEWAY environment variable with the full HTTP URL to the gateway without trailing forward slash.\n");
+            av_log(h, AV_LOG_INFO, "3. Define an $IPFS_PATH environment variable and point it to the IPFS data path - this is typically ~/.ipfs\n");
+        }
+    }
+
+    // Test if the gateway starts with either http:// or https://
+    if (av_stristart(c->gateway_buffer, "http://", NULL) == 0
+        && av_stristart(c->gateway_buffer, "https://", NULL) == 0) {
+        av_log(h, AV_LOG_WARNING, "The gateway URL didn't start with http:// or https:// and is therefore invalid.\n");
+        ret = AVERROR(EILSEQ);
+        goto err;
+    }
+
+    // Concatenate the url.
+    // This ends up with something like: http://localhost:8080/ipfs/Qm.....
+    // The format of "%s%s%s%s" is the following:
+    // 1st %s = The gateway.
+    // 2nd %s = If the gateway didn't end in a slash, add a "/". Otherwise it's an empty string
+    // 3rd %s = Either ipns/ or ipfs/.
+    // 4th %s = The IPFS CID (Qm..., bafy..., ...).
+    fulluri = av_asprintf("%s%s%s%s",
+                          c->gateway_buffer,
+                          (c->gateway_buffer[strlen(c->gateway_buffer) - 1] == '/') ? "" : "/",
+                          (is_ipns) ? "ipns/" : "ipfs/",
+                          ipfs_cid);
+
+    // Pass the URL back to FFMpeg's protocol handler.
+    if ((ret = ffurl_open_whitelist(&c->inner, fulluri, flags,
+                                    &h->interrupt_callback, options,
+                                    h->protocol_whitelist,
+                                    h->protocol_blacklist, h))
+        < 0) {
+        av_log(h, AV_LOG_WARNING, "Unable to open resource: %s\n", fulluri);
+        goto err;
+    }
+
+err:
+    av_free(fulluri);
+    return ret;
+}
+
+static int ipfs_read(URLContext *h, unsigned char *buf, int size)
+{
+    IPFSGatewayContext *c = h->priv_data;
+    return ffurl_read(c->inner, buf, size);
+}
+
+static int64_t ipfs_seek(URLContext *h, int64_t pos, int whence)
+{
+    IPFSGatewayContext *c = h->priv_data;
+    return ffurl_seek(c->inner, pos, whence);
+}
+
+static int ipfs_close(URLContext *h)
+{
+    IPFSGatewayContext *c = h->priv_data;
+    return ffurl_closep(&c->inner);
+}
+
+#define OFFSET(x) offsetof(IPFSGatewayContext, x)
+
+static const AVOption options[] = {
+    {"gateway", "The gateway to ask for IPFS data.", OFFSET(gateway), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, AV_OPT_FLAG_DECODING_PARAM},
+    {NULL},
+};
+
+static const AVClass ipfs_context_class = {
+    .class_name   = "IPFS",
+    .item_name    = av_default_item_name,
+    .option       = options,
+    .version      = LIBAVUTIL_VERSION_INT,
+};
+
+const URLProtocol ff_ipfs_protocol = {
+    .name             = "ipfs",
+    .url_open2        = translate_ipfs_to_http,
+    .url_read         = ipfs_read,
+    .url_seek         = ipfs_seek,
+    .url_close        = ipfs_close,
+    .priv_data_size   = sizeof(IPFSGatewayContext),
+    .priv_data_class  = &ipfs_context_class,
+};
+
+const URLProtocol ff_ipns_protocol = {
+    .name             = "ipns",
+    .url_open2        = translate_ipfs_to_http,
+    .url_read         = ipfs_read,
+    .url_seek         = ipfs_seek,
+    .url_close        = ipfs_close,
+    .priv_data_size   = sizeof(IPFSGatewayContext),
+    .priv_data_class  = &ipfs_context_class,
+};
diff --git a/libavformat/protocols.c b/libavformat/protocols.c
index d07563cd0c..6ee62a598a 100644
--- a/libavformat/protocols.c
+++ b/libavformat/protocols.c
@@ -71,6 +71,8 @@  extern const URLProtocol ff_libsrt_protocol;
 extern const URLProtocol ff_libssh_protocol;
 extern const URLProtocol ff_libsmbclient_protocol;
 extern const URLProtocol ff_libzmq_protocol;
+extern const URLProtocol ff_ipfs_protocol;
+extern const URLProtocol ff_ipns_protocol;
 
 #include "libavformat/protocol_list.c"