Message ID | 20210626165657.2066-1-jamrial@gmail.com |
---|---|
State | Accepted |
Headers | show |
Series | [FFmpeg-devel,v2] avformat: add a concat protocol that takes a line break delimited list of resources | expand |
Context | Check | Description |
---|---|---|
andriy/x86_make | success | Make finished |
andriy/x86_make_fate | success | Make fate finished |
andriy/PPC64_make | success | Make finished |
andriy/PPC64_make_fate | success | Make fate finished |
James Almer (12021-06-26): > Suggested-by: ffmpeg@fb.com > Signed-off-by: James Almer <jamrial@gmail.com> > --- > Updated documentation, and line breaks can now be part of the filename. > > doc/protocols.texi | 33 +++++++++ > libavformat/Makefile | 1 + > libavformat/concat.c | 146 ++++++++++++++++++++++++++++++++++++++++ > libavformat/protocols.c | 1 + > 4 files changed, 181 insertions(+) > > diff --git a/doc/protocols.texi b/doc/protocols.texi > index ccdfb6e439..11de674225 100644 > --- a/doc/protocols.texi > +++ b/doc/protocols.texi > @@ -215,6 +215,39 @@ ffplay concat:split1.mpeg\|split2.mpeg\|split3.mpeg > Note that you may need to escape the character "|" which is special for > many shells. > > +@section concatf > + > +Physical concatenation protocol using a line break delimited list of > +resources. > + > +Read and seek from many resources in sequence as if they were > +a unique resource. > + > +A URL accepted by this protocol has the syntax: > +@example > +concatf:@var{URL} > +@end example > + > +where @var{URL} is the url containing a line break delimited list of > +resources to be concatenated, each one possibly specifying a distinct > +protocol. > + > +For example to read a sequence of files @file{split1.mpeg}, > +@file{split2.mpeg}, @file{split3.mpeg} listed in separate lines within > +a file @file{split.txt} with @command{ffplay} use the command: > +@example > +ffplay concatf:split.txt > +@end example > +Where @file{split.txt} contains the lines: > +@example > +split1.mpeg > +split2.mpeg > +split3.mpeg > +@end example > + > +Note that if any of the entries in the list contain a line break as part > +of their name, you'll need to escape it with a preceding "\" character. This is not detailed and accurate enough. And it should be a link to the common description of our escaping syntax anyway. > + > @section crypto > > AES-encrypted stream reading protocol. > diff --git a/libavformat/Makefile b/libavformat/Makefile > index c9ef564523..caca95802a 100644 > --- a/libavformat/Makefile > +++ b/libavformat/Makefile > @@ -616,6 +616,7 @@ OBJS-$(CONFIG_APPLEHTTP_PROTOCOL) += hlsproto.o > OBJS-$(CONFIG_BLURAY_PROTOCOL) += bluray.o > OBJS-$(CONFIG_CACHE_PROTOCOL) += cache.o > OBJS-$(CONFIG_CONCAT_PROTOCOL) += concat.o > +OBJS-$(CONFIG_CONCATF_PROTOCOL) += concat.o > OBJS-$(CONFIG_CRYPTO_PROTOCOL) += crypto.o > OBJS-$(CONFIG_DATA_PROTOCOL) += data_uri.o > OBJS-$(CONFIG_FFRTMPCRYPT_PROTOCOL) += rtmpcrypt.o rtmpdigest.o rtmpdh.o > diff --git a/libavformat/concat.c b/libavformat/concat.c > index 278afd997d..b66e3b9e01 100644 > --- a/libavformat/concat.c > +++ b/libavformat/concat.c > @@ -22,9 +22,11 @@ > */ > > #include "libavutil/avstring.h" > +#include "libavutil/bprint.h" > #include "libavutil/mem.h" > > #include "avformat.h" > +#include "avio_internal.h" > #include "url.h" > > #define AV_CAT_SEPARATOR "|" > @@ -56,6 +58,7 @@ static av_cold int concat_close(URLContext *h) > return err < 0 ? -1 : 0; > } > > +#if CONFIG_CONCAT_PROTOCOL > static av_cold int concat_open(URLContext *h, const char *uri, int flags) > { > char *node_uri = NULL; > @@ -124,6 +127,7 @@ static av_cold int concat_open(URLContext *h, const char *uri, int flags) > data->total_size = total_size; > return err; > } > +#endif > > static int concat_read(URLContext *h, unsigned char *buf, int size) > { > @@ -188,6 +192,7 @@ static int64_t concat_seek(URLContext *h, int64_t pos, int whence) > return result; > } > > +#if CONFIG_CONCAT_PROTOCOL > const URLProtocol ff_concat_protocol = { > .name = "concat", > .url_open = concat_open, > @@ -197,3 +202,144 @@ const URLProtocol ff_concat_protocol = { > .priv_data_size = sizeof(struct concat_data), > .default_whitelist = "concat,file,subfile", > }; > +#endif > + > +#if CONFIG_CONCATF_PROTOCOL > +// Custom ff_read_line_to_bprint() implementation where line breaks can be > +// part of the line being read if escaped. > +static int64_t read_line_to_bprint(AVIOContext *s, AVBPrint *bp) > +{ > + int len, end; > + int64_t read = 0; > + char tmp[1024]; > + char c; > + > + do { > + len = 0; > + do { > + char escape = c = avio_r8(s); > + if (c == '\\') > + c = avio_r8(s); > + end = (c == '\r' || c == '\n' || c == '\0'); > + if (end && escape == '\\') { > + if (c != '\0') { > + tmp[len++] = c; > + end = 0; > + } else > + tmp[len++] = escape; > + } else if (!end) { > + if (escape == '\\') { > + tmp[len++] = escape; > + avio_skip(s, -1); > + } else > + tmp[len++] = c; > + } > + } while (!end && len < sizeof(tmp)); > + av_bprint_append_data(bp, tmp, len); > + read += len; > + } while (!end); > + > + if (c == '\r' && avio_r8(s) != '\n' && !avio_feof(s)) > + avio_skip(s, -1); > + > + if (!c && s->error) > + return s->error; > + > + if (!c && !read && avio_feof(s)) > + return AVERROR_EOF; > + > + return read; > +} Re-implementing yet another de-escaping and splitting function is a terrible idea, I am strongly against it. It would be easier if we had a good string API already. Barring that, I think you need to load the whole file and use the existing de-escaping functions. > + > +static av_cold int concatf_open(URLContext *h, const char *uri, int flags) > +{ > + AVBPrint bp; > + struct concat_data *data = h->priv_data; > + struct concat_nodes *nodes = NULL; > + AVIOContext *in = NULL; > + URLContext *uc; > + int64_t size, total_size = 0; > + unsigned int nodes_size = 0; > + size_t i = 0; > + int err = 0; > + > + if (!av_strstart(uri, "concatf:", &uri)) { > + av_log(h, AV_LOG_ERROR, "URL %s lacks prefix\n", uri); > + return AVERROR(EINVAL); > + } > + > + /* handle input */ > + if (!*uri) > + return AVERROR(ENOENT); > + > + err = ffio_open_whitelist(&in, uri, AVIO_FLAG_READ, &h->interrupt_callback, > + NULL, h->protocol_whitelist, h->protocol_blacklist); > + if (err < 0) > + return err; > + > + av_bprint_init(&bp, 0, AV_BPRINT_SIZE_UNLIMITED); > + > + for (i = 0; !avio_feof(in); i++) { > + size_t len = i; > + > + av_bprint_clear(&bp); > + if ((err = read_line_to_bprint(in, &bp)) <= 0) { > + if (err == 0 && i == 0) > + err = AVERROR_INVALIDDATA; > + else if (err == AVERROR_EOF) > + err = 0; > + break; > + } > + > + if (++len == SIZE_MAX / sizeof(*nodes)) { > + err = AVERROR(ENAMETOOLONG); > + break; > + } > + > + /* creating URLContext */ > + err = ffurl_open_whitelist(&uc, bp.str, flags, > + &h->interrupt_callback, NULL, h->protocol_whitelist, h->protocol_blacklist, h); > + if (err < 0) > + break; > + > + /* creating size */ > + if ((size = ffurl_size(uc)) < 0) { > + ffurl_close(uc); > + err = AVERROR(ENOSYS); > + break; > + } > + > + nodes = av_fast_realloc(data->nodes, &nodes_size, sizeof(*nodes) * len); > + if (!nodes) { > + ffurl_close(uc); > + err = AVERROR(ENOMEM); > + break; > + } > + data->nodes = nodes; > + > + /* assembling */ > + data->nodes[i].uc = uc; > + data->nodes[i].size = size; > + total_size += size; > + } > + avio_closep(&in); > + av_bprint_finalize(&bp, NULL); > + data->length = i; > + > + if (err < 0) > + concat_close(h); > + > + data->total_size = total_size; > + return err; > +} > + > +const URLProtocol ff_concatf_protocol = { > + .name = "concatf", > + .url_open = concatf_open, > + .url_read = concat_read, > + .url_seek = concat_seek, > + .url_close = concat_close, > + .priv_data_size = sizeof(struct concat_data), > + .default_whitelist = "concatf,concat,file,subfile", > +}; > +#endif > diff --git a/libavformat/protocols.c b/libavformat/protocols.c > index 4b6b1c8e98..7f08f151b6 100644 > --- a/libavformat/protocols.c > +++ b/libavformat/protocols.c > @@ -27,6 +27,7 @@ extern const URLProtocol ff_async_protocol; > extern const URLProtocol ff_bluray_protocol; > extern const URLProtocol ff_cache_protocol; > extern const URLProtocol ff_concat_protocol; > +extern const URLProtocol ff_concatf_protocol; > extern const URLProtocol ff_crypto_protocol; > extern const URLProtocol ff_data_protocol; > extern const URLProtocol ff_ffrtmpcrypt_protocol; Regards,
On 6/27/2021 3:26 PM, Nicolas George wrote: > James Almer (12021-06-26): >> Suggested-by: ffmpeg@fb.com >> Signed-off-by: James Almer <jamrial@gmail.com> >> --- >> Updated documentation, and line breaks can now be part of the filename. >> >> doc/protocols.texi | 33 +++++++++ >> libavformat/Makefile | 1 + >> libavformat/concat.c | 146 ++++++++++++++++++++++++++++++++++++++++ >> libavformat/protocols.c | 1 + >> 4 files changed, 181 insertions(+) >> >> diff --git a/doc/protocols.texi b/doc/protocols.texi >> index ccdfb6e439..11de674225 100644 >> --- a/doc/protocols.texi >> +++ b/doc/protocols.texi >> @@ -215,6 +215,39 @@ ffplay concat:split1.mpeg\|split2.mpeg\|split3.mpeg >> Note that you may need to escape the character "|" which is special for >> many shells. >> >> +@section concatf >> + >> +Physical concatenation protocol using a line break delimited list of >> +resources. >> + >> +Read and seek from many resources in sequence as if they were >> +a unique resource. >> + >> +A URL accepted by this protocol has the syntax: >> +@example >> +concatf:@var{URL} >> +@end example >> + >> +where @var{URL} is the url containing a line break delimited list of >> +resources to be concatenated, each one possibly specifying a distinct >> +protocol. >> + >> +For example to read a sequence of files @file{split1.mpeg}, >> +@file{split2.mpeg}, @file{split3.mpeg} listed in separate lines within >> +a file @file{split.txt} with @command{ffplay} use the command: >> +@example >> +ffplay concatf:split.txt >> +@end example >> +Where @file{split.txt} contains the lines: >> +@example >> +split1.mpeg >> +split2.mpeg >> +split3.mpeg >> +@end example >> + > >> +Note that if any of the entries in the list contain a line break as part >> +of their name, you'll need to escape it with a preceding "\" character. > > This is not detailed and accurate enough. And it should be a link to the > common description of our escaping syntax anyway. How is it not accurate enough? If the file contains the following foobar.h264 foo\ bar.h264 It will read the resources foobar.h264 and foo bar.h264 But without that backslash, it will try to read foo and bar.h264 as separate resources because it interpreted the line break as a delimiter character. I can add a line to the documentation stating that no other character needs to be escaped if that will make it more clear. > >> + >> @section crypto >> >> AES-encrypted stream reading protocol. >> diff --git a/libavformat/Makefile b/libavformat/Makefile >> index c9ef564523..caca95802a 100644 >> --- a/libavformat/Makefile >> +++ b/libavformat/Makefile >> @@ -616,6 +616,7 @@ OBJS-$(CONFIG_APPLEHTTP_PROTOCOL) += hlsproto.o >> OBJS-$(CONFIG_BLURAY_PROTOCOL) += bluray.o >> OBJS-$(CONFIG_CACHE_PROTOCOL) += cache.o >> OBJS-$(CONFIG_CONCAT_PROTOCOL) += concat.o >> +OBJS-$(CONFIG_CONCATF_PROTOCOL) += concat.o >> OBJS-$(CONFIG_CRYPTO_PROTOCOL) += crypto.o >> OBJS-$(CONFIG_DATA_PROTOCOL) += data_uri.o >> OBJS-$(CONFIG_FFRTMPCRYPT_PROTOCOL) += rtmpcrypt.o rtmpdigest.o rtmpdh.o >> diff --git a/libavformat/concat.c b/libavformat/concat.c >> index 278afd997d..b66e3b9e01 100644 >> --- a/libavformat/concat.c >> +++ b/libavformat/concat.c >> @@ -22,9 +22,11 @@ >> */ >> >> #include "libavutil/avstring.h" >> +#include "libavutil/bprint.h" >> #include "libavutil/mem.h" >> >> #include "avformat.h" >> +#include "avio_internal.h" >> #include "url.h" >> >> #define AV_CAT_SEPARATOR "|" >> @@ -56,6 +58,7 @@ static av_cold int concat_close(URLContext *h) >> return err < 0 ? -1 : 0; >> } >> >> +#if CONFIG_CONCAT_PROTOCOL >> static av_cold int concat_open(URLContext *h, const char *uri, int flags) >> { >> char *node_uri = NULL; >> @@ -124,6 +127,7 @@ static av_cold int concat_open(URLContext *h, const char *uri, int flags) >> data->total_size = total_size; >> return err; >> } >> +#endif >> >> static int concat_read(URLContext *h, unsigned char *buf, int size) >> { >> @@ -188,6 +192,7 @@ static int64_t concat_seek(URLContext *h, int64_t pos, int whence) >> return result; >> } >> >> +#if CONFIG_CONCAT_PROTOCOL >> const URLProtocol ff_concat_protocol = { >> .name = "concat", >> .url_open = concat_open, >> @@ -197,3 +202,144 @@ const URLProtocol ff_concat_protocol = { >> .priv_data_size = sizeof(struct concat_data), >> .default_whitelist = "concat,file,subfile", >> }; >> +#endif >> + > >> +#if CONFIG_CONCATF_PROTOCOL >> +// Custom ff_read_line_to_bprint() implementation where line breaks can be >> +// part of the line being read if escaped. >> +static int64_t read_line_to_bprint(AVIOContext *s, AVBPrint *bp) >> +{ >> + int len, end; >> + int64_t read = 0; >> + char tmp[1024]; >> + char c; >> + >> + do { >> + len = 0; >> + do { >> + char escape = c = avio_r8(s); >> + if (c == '\\') >> + c = avio_r8(s); >> + end = (c == '\r' || c == '\n' || c == '\0'); >> + if (end && escape == '\\') { >> + if (c != '\0') { >> + tmp[len++] = c; >> + end = 0; >> + } else >> + tmp[len++] = escape; >> + } else if (!end) { >> + if (escape == '\\') { >> + tmp[len++] = escape; >> + avio_skip(s, -1); >> + } else >> + tmp[len++] = c; >> + } >> + } while (!end && len < sizeof(tmp)); >> + av_bprint_append_data(bp, tmp, len); >> + read += len; >> + } while (!end); >> + >> + if (c == '\r' && avio_r8(s) != '\n' && !avio_feof(s)) >> + avio_skip(s, -1); >> + >> + if (!c && s->error) >> + return s->error; >> + >> + if (!c && !read && avio_feof(s)) >> + return AVERROR_EOF; >> + >> + return read; >> +} > > Re-implementing yet another de-escaping and splitting function is a > terrible idea, I am strongly against it. > > It would be easier if we had a good string API already. Barring that, I > think you need to load the whole file and use the existing de-escaping > functions. The existing de-escaping functions will just remove backslashes, and the line break character will be parsed as a delimiter, which is why i wrote the above to ensure it is read as part of the filename. The contents of the file are meant to be taken as is, and if you want the delimiter character to be part of the filename, you need to let the parser know about it. Every other character in the text file doesn't need to be escaped since this is not the command line where they could be interpreted as something else. > >> + >> +static av_cold int concatf_open(URLContext *h, const char *uri, int flags) >> +{ >> + AVBPrint bp; >> + struct concat_data *data = h->priv_data; >> + struct concat_nodes *nodes = NULL; >> + AVIOContext *in = NULL; >> + URLContext *uc; >> + int64_t size, total_size = 0; >> + unsigned int nodes_size = 0; >> + size_t i = 0; >> + int err = 0; >> + >> + if (!av_strstart(uri, "concatf:", &uri)) { >> + av_log(h, AV_LOG_ERROR, "URL %s lacks prefix\n", uri); >> + return AVERROR(EINVAL); >> + } >> + >> + /* handle input */ >> + if (!*uri) >> + return AVERROR(ENOENT); >> + >> + err = ffio_open_whitelist(&in, uri, AVIO_FLAG_READ, &h->interrupt_callback, >> + NULL, h->protocol_whitelist, h->protocol_blacklist); >> + if (err < 0) >> + return err; >> + >> + av_bprint_init(&bp, 0, AV_BPRINT_SIZE_UNLIMITED); >> + >> + for (i = 0; !avio_feof(in); i++) { >> + size_t len = i; >> + >> + av_bprint_clear(&bp); >> + if ((err = read_line_to_bprint(in, &bp)) <= 0) { >> + if (err == 0 && i == 0) >> + err = AVERROR_INVALIDDATA; >> + else if (err == AVERROR_EOF) >> + err = 0; >> + break; >> + } >> + >> + if (++len == SIZE_MAX / sizeof(*nodes)) { >> + err = AVERROR(ENAMETOOLONG); >> + break; >> + } >> + >> + /* creating URLContext */ >> + err = ffurl_open_whitelist(&uc, bp.str, flags, >> + &h->interrupt_callback, NULL, h->protocol_whitelist, h->protocol_blacklist, h); >> + if (err < 0) >> + break; >> + >> + /* creating size */ >> + if ((size = ffurl_size(uc)) < 0) { >> + ffurl_close(uc); >> + err = AVERROR(ENOSYS); >> + break; >> + } >> + >> + nodes = av_fast_realloc(data->nodes, &nodes_size, sizeof(*nodes) * len); >> + if (!nodes) { >> + ffurl_close(uc); >> + err = AVERROR(ENOMEM); >> + break; >> + } >> + data->nodes = nodes; >> + >> + /* assembling */ >> + data->nodes[i].uc = uc; >> + data->nodes[i].size = size; >> + total_size += size; >> + } >> + avio_closep(&in); >> + av_bprint_finalize(&bp, NULL); >> + data->length = i; >> + >> + if (err < 0) >> + concat_close(h); >> + >> + data->total_size = total_size; >> + return err; >> +} >> + >> +const URLProtocol ff_concatf_protocol = { >> + .name = "concatf", >> + .url_open = concatf_open, >> + .url_read = concat_read, >> + .url_seek = concat_seek, >> + .url_close = concat_close, >> + .priv_data_size = sizeof(struct concat_data), >> + .default_whitelist = "concatf,concat,file,subfile", >> +}; >> +#endif >> diff --git a/libavformat/protocols.c b/libavformat/protocols.c >> index 4b6b1c8e98..7f08f151b6 100644 >> --- a/libavformat/protocols.c >> +++ b/libavformat/protocols.c >> @@ -27,6 +27,7 @@ extern const URLProtocol ff_async_protocol; >> extern const URLProtocol ff_bluray_protocol; >> extern const URLProtocol ff_cache_protocol; >> extern const URLProtocol ff_concat_protocol; >> +extern const URLProtocol ff_concatf_protocol; >> extern const URLProtocol ff_crypto_protocol; >> extern const URLProtocol ff_data_protocol; >> extern const URLProtocol ff_ffrtmpcrypt_protocol; > > Regards, > > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". >
James Almer (12021-06-27): > How is it not accurate enough? Which characters must be escaped? Is it legal to escape another character? Does it support single quotes like the other escaping functions? Can you use byte or Unicode numbers? Various escaping syntax have subtle differences about these points, and this is something users need to know about, especially for scripts. Anyway, our escaping syntax should be documented once well, with links to it everywhere where needed, the description must not be duplicated all over the place. > I can add a line to the documentation stating that no other character needs > to be escaped if that will make it more clear. Make it a link to the unique good documentation of escaping. If we do not have an unique good documentation of escaping, the it needs to be written, and this is a good occasion. But I think we have it. > The existing de-escaping functions will just remove backslashes, and the > line break character will be parsed as a delimiter, which is why i wrote the > above to ensure it is read as part of the filename. > > The contents of the file are meant to be taken as is, and if you want the > delimiter character to be part of the filename, you need to let the parser > know about it. Every other character in the text file doesn't need to be > escaped since this is not the command line where they could be interpreted > as something else. I know what the existing de-escaping functions do, and I know what they can be used for, and I repeat: you can the parsing correctly with them by reading the whole file at once. But as it is, this code duplication is a big no. Regards,
On 6/27/2021 4:00 PM, Nicolas George wrote: > James Almer (12021-06-27): >> How is it not accurate enough? > > Which characters must be escaped? Is it legal to escape another > character? Does it support single quotes like the other escaping > functions? Can you use byte or Unicode numbers? Only line breaks need to be escaped. I said as much and even suggested to make it more explicit in the documentation. Every other character will be read and added verbatim into a string used as uri for a given resource that ffurl_open_whitelist() will then handle. Create a text file with the following four entries foo\ bar.h264 foo'bar.h264 foo\bar.h264 foobar.h264 Where only the line break is escaped to ensure it's not interpreted as a resource name delimiter, and they will all be read and open if they exist. > > Various escaping syntax have subtle differences about these points, and > this is something users need to know about, especially for scripts. > > Anyway, our escaping syntax should be documented once well, with links > to it everywhere where needed, the description must not be duplicated > all over the place. > >> I can add a line to the documentation stating that no other character needs >> to be escaped if that will make it more clear. > > Make it a link to the unique good documentation of escaping. There's no need for any escaping other than the delimiter character, so wouldn't it be misleading linking to a document that will make you write your entries in the file in a way that will not work? Said documentation will tell you that if you want to load foo'bar.h264 you will need to write it as foo\'bar.h264, but that's not true here and will only make it attempt to load a file with all those characters as part of its name. > > If we do not have an unique good documentation of escaping, the it needs > to be written, and this is a good occasion. But I think we have it. > >> The existing de-escaping functions will just remove backslashes, and the >> line break character will be parsed as a delimiter, which is why i wrote the >> above to ensure it is read as part of the filename. >> >> The contents of the file are meant to be taken as is, and if you want the >> delimiter character to be part of the filename, you need to let the parser >> know about it. Every other character in the text file doesn't need to be >> escaped since this is not the command line where they could be interpreted >> as something else. > > I know what the existing de-escaping functions do, and I know what they > can be used for, and I repeat: you can the parsing correctly with them > by reading the whole file at once. > > But as it is, this code duplication is a big no. Wont your request to use escaping functions result in the need for the entries in the text file to start pointlessly escaping characters like quotes, backslashes and others for no reason, as if this were bash? The protocol right now supports line breaks as part of the filename if you escape it like you asked me to support, alongside every other character without doing anything for them. What you're now asking from me is to require it to parse more characters than necessary as if this was the command line, when there's no reason to. > > Regards, > > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". >
James Almer (12021-06-27): > Only line breaks need to be escaped. I said as much and even suggested to > make it more explicit in the documentation. > Every other character will be read and added verbatim into a string used as > uri for a given resource that ffurl_open_whitelist() will then handle. > > Create a text file with the following four entries > > foo\ > bar.h264 > foo'bar.h264 > foo\bar.h264 > foobar.h264 > > Where only the line break is escaped to ensure it's not interpreted as a > resource name delimiter, and they will all be read and open if they exist. I do not need you to explain this to me. > There's no need for any escaping other than the delimiter character, so > wouldn't it be misleading linking to a document that will make you write > your entries in the file in a way that will not work? The code and documentation must be compatible, of course. > Said documentation will tell you that if you want to load foo'bar.h264 you > will need to write it as foo\'bar.h264, but that's not true here and will That should be true, see below. > Wont your request to use escaping functions result in the need for the > entries in the text file to start pointlessly escaping characters like > quotes, backslashes and others for no reason, as if this were bash? My request results in using the same syntax we have developed and tuned for the rest of the project. Having several subtly different rules for escaping in a single project is an abominable idea. Regards,
On 6/27/2021 4:40 PM, Nicolas George wrote: > James Almer (12021-06-27): >> Only line breaks need to be escaped. I said as much and even suggested to >> make it more explicit in the documentation. >> Every other character will be read and added verbatim into a string used as >> uri for a given resource that ffurl_open_whitelist() will then handle. >> >> Create a text file with the following four entries >> >> foo\ >> bar.h264 >> foo'bar.h264 >> foo\bar.h264 >> foobar.h264 >> >> Where only the line break is escaped to ensure it's not interpreted as a >> resource name delimiter, and they will all be read and open if they exist. > > I do not need you to explain this to me. You asked what characters needed to be escaped, i answered and figured giving an example would help. If not for you, for someone else reading this thread. I know well strings are your field of expertise and hardly mine. Don't wrongly assume the other person has ill intentions. > >> There's no need for any escaping other than the delimiter character, so >> wouldn't it be misleading linking to a document that will make you write >> your entries in the file in a way that will not work? > > The code and documentation must be compatible, of course. > >> Said documentation will tell you that if you want to load foo'bar.h264 you >> will need to write it as foo\'bar.h264, but that's not true here and will > > That should be true, see below. > >> Wont your request to use escaping functions result in the need for the >> entries in the text file to start pointlessly escaping characters like >> quotes, backslashes and others for no reason, as if this were bash? > > My request results in using the same syntax we have developed and tuned > for the rest of the project. > > Having several subtly different rules for escaping in a single project > is an abominable idea. It's literally having a list of raw filenames in a file, where the delimiter character can be part of the filename and as such needs to be handled. I'm not writing a new language, or new escaping rules. I don't see how it's so unacceptable. Can you point me in the right direction then? What functions should i use? How would the entries in the text file need to be written like in order for said functions to write all valid characters correctly in the output buffer? How will line breaks as part of the file name need be handled? A backslash before an actual line break like in this version, or will this mean the entries will need to look like something out of bash? If the latter, will these functions convert the sequence of the characters \ and n into a single 0x0A byte in the output buffer? > > Regards, > > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". >
James Almer (12021-06-27): > You asked No, I did not ASK: I told what needs to be present in the documentation. > I know well strings are your field of expertise and hardly mine. Don't > wrongly assume the other person has ill intentions. I am not assuming any ill intent from you. Why would you think that? This is the second time in a few days that you accuse me of personal stuff like that. Please re-calibrate your opinion of me. > It's literally having a list of raw filenames in a file, where the delimiter > character can be part of the filename and as such needs to be handled. I'm > not writing a new language, or new escaping rules. I don't see how it's so > unacceptable. What you just described IS precisely a new set of escaping rules. It is very very simple, but it is a set of rules. A new one, while we already have one. And of course, you are re-implementing it, which in itself cannot be accepted. > Can you point me in the right direction then? What functions should i use? I already pointed you in the right direction: read the whole file, and then use the standard parsing and de-escaping functions. Your use of av_get_token() in the first patches was good. The flaw was using them on separate lines instead of the whole file: you ended up with both ff_read_line_to_bprint_overwrite() and av_get_token() handling the new line characters, in a subtly different way. You needed to get rid of one of them, the one that does only a small part of the work, to keep the one that can do the whole work. Regards,
On 6/27/2021 5:16 PM, Nicolas George wrote: > James Almer (12021-06-27): >> You asked > > No, I did not ASK: I told what needs to be present in the documentation. Your first paragraph started with "Which characters must be escaped?" among other questions. The part you quoted from my reply was an answer to that, including the example. What is the documentation about escaping syntax i should link to, for that matter? The documentation for the other concat protocol simply states that special characters must be escaped, but no reference to any external document or section within our documentation about it. > >> I know well strings are your field of expertise and hardly mine. Don't >> wrongly assume the other person has ill intentions. > > I am not assuming any ill intent from you. Why would you think that? I probably read too much into what you said. Sorry. > This is the second time in a few days that you accuse me of personal > stuff like that. Please re-calibrate your opinion of me. > >> It's literally having a list of raw filenames in a file, where the delimiter >> character can be part of the filename and as such needs to be handled. I'm >> not writing a new language, or new escaping rules. I don't see how it's so >> unacceptable. > > What you just described IS precisely a new set of escaping rules. It is > very very simple, but it is a set of rules. A new one, while we already > have one. > > And of course, you are re-implementing it, which in itself cannot be > accepted. > >> Can you point me in the right direction then? What functions should i use? > > I already pointed you in the right direction: read the whole file, and > then use the standard parsing and de-escaping functions. > > Your use of av_get_token() in the first patches was good. The flaw was > using them on separate lines instead of the whole file: you ended up > with both ff_read_line_to_bprint_overwrite() and av_get_token() handling > the new line characters, in a subtly different way. You needed to get > rid of one of them, the one that does only a small part of the work, to > keep the one that can do the whole work. > > Regards, > > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". >
James Almer (12021-06-27): > Your first paragraph started with "Which characters must be escaped?" Yes, but it was a follow-up to "the doc is not detailed enough". I trim my mails, because it makes reading easier, but the context still matters. Should I leave more of it, even for conversations over the course of just a few hours? > What is the documentation about escaping syntax i should link to, for that > matter? The documentation for the other concat protocol simply states that > special characters must be escaped, but no reference to any external > document or section within our documentation about it. doc/utils.texi starts with: # @chapter Syntax # @c man begin SYNTAX # # This section documents the syntax and formats employed by the FFmpeg # libraries and tools. # # @anchor{quoting_and_escaping} # @section Quoting and escaping # # FFmpeg adopts the following quoting and escaping mechanism, unless # explicitly specified. The following rules are applied: It looks like exactly what we want. > I probably read too much into what you said. Sorry. No problem. I hope we can learn to not be irritated by each other's style. Regards,
diff --git a/doc/protocols.texi b/doc/protocols.texi index ccdfb6e439..11de674225 100644 --- a/doc/protocols.texi +++ b/doc/protocols.texi @@ -215,6 +215,39 @@ ffplay concat:split1.mpeg\|split2.mpeg\|split3.mpeg Note that you may need to escape the character "|" which is special for many shells. +@section concatf + +Physical concatenation protocol using a line break delimited list of +resources. + +Read and seek from many resources in sequence as if they were +a unique resource. + +A URL accepted by this protocol has the syntax: +@example +concatf:@var{URL} +@end example + +where @var{URL} is the url containing a line break delimited list of +resources to be concatenated, each one possibly specifying a distinct +protocol. + +For example to read a sequence of files @file{split1.mpeg}, +@file{split2.mpeg}, @file{split3.mpeg} listed in separate lines within +a file @file{split.txt} with @command{ffplay} use the command: +@example +ffplay concatf:split.txt +@end example +Where @file{split.txt} contains the lines: +@example +split1.mpeg +split2.mpeg +split3.mpeg +@end example + +Note that if any of the entries in the list contain a line break as part +of their name, you'll need to escape it with a preceding "\" character. + @section crypto AES-encrypted stream reading protocol. diff --git a/libavformat/Makefile b/libavformat/Makefile index c9ef564523..caca95802a 100644 --- a/libavformat/Makefile +++ b/libavformat/Makefile @@ -616,6 +616,7 @@ OBJS-$(CONFIG_APPLEHTTP_PROTOCOL) += hlsproto.o OBJS-$(CONFIG_BLURAY_PROTOCOL) += bluray.o OBJS-$(CONFIG_CACHE_PROTOCOL) += cache.o OBJS-$(CONFIG_CONCAT_PROTOCOL) += concat.o +OBJS-$(CONFIG_CONCATF_PROTOCOL) += concat.o OBJS-$(CONFIG_CRYPTO_PROTOCOL) += crypto.o OBJS-$(CONFIG_DATA_PROTOCOL) += data_uri.o OBJS-$(CONFIG_FFRTMPCRYPT_PROTOCOL) += rtmpcrypt.o rtmpdigest.o rtmpdh.o diff --git a/libavformat/concat.c b/libavformat/concat.c index 278afd997d..b66e3b9e01 100644 --- a/libavformat/concat.c +++ b/libavformat/concat.c @@ -22,9 +22,11 @@ */ #include "libavutil/avstring.h" +#include "libavutil/bprint.h" #include "libavutil/mem.h" #include "avformat.h" +#include "avio_internal.h" #include "url.h" #define AV_CAT_SEPARATOR "|" @@ -56,6 +58,7 @@ static av_cold int concat_close(URLContext *h) return err < 0 ? -1 : 0; } +#if CONFIG_CONCAT_PROTOCOL static av_cold int concat_open(URLContext *h, const char *uri, int flags) { char *node_uri = NULL; @@ -124,6 +127,7 @@ static av_cold int concat_open(URLContext *h, const char *uri, int flags) data->total_size = total_size; return err; } +#endif static int concat_read(URLContext *h, unsigned char *buf, int size) { @@ -188,6 +192,7 @@ static int64_t concat_seek(URLContext *h, int64_t pos, int whence) return result; } +#if CONFIG_CONCAT_PROTOCOL const URLProtocol ff_concat_protocol = { .name = "concat", .url_open = concat_open, @@ -197,3 +202,144 @@ const URLProtocol ff_concat_protocol = { .priv_data_size = sizeof(struct concat_data), .default_whitelist = "concat,file,subfile", }; +#endif + +#if CONFIG_CONCATF_PROTOCOL +// Custom ff_read_line_to_bprint() implementation where line breaks can be +// part of the line being read if escaped. +static int64_t read_line_to_bprint(AVIOContext *s, AVBPrint *bp) +{ + int len, end; + int64_t read = 0; + char tmp[1024]; + char c; + + do { + len = 0; + do { + char escape = c = avio_r8(s); + if (c == '\\') + c = avio_r8(s); + end = (c == '\r' || c == '\n' || c == '\0'); + if (end && escape == '\\') { + if (c != '\0') { + tmp[len++] = c; + end = 0; + } else + tmp[len++] = escape; + } else if (!end) { + if (escape == '\\') { + tmp[len++] = escape; + avio_skip(s, -1); + } else + tmp[len++] = c; + } + } while (!end && len < sizeof(tmp)); + av_bprint_append_data(bp, tmp, len); + read += len; + } while (!end); + + if (c == '\r' && avio_r8(s) != '\n' && !avio_feof(s)) + avio_skip(s, -1); + + if (!c && s->error) + return s->error; + + if (!c && !read && avio_feof(s)) + return AVERROR_EOF; + + return read; +} + +static av_cold int concatf_open(URLContext *h, const char *uri, int flags) +{ + AVBPrint bp; + struct concat_data *data = h->priv_data; + struct concat_nodes *nodes = NULL; + AVIOContext *in = NULL; + URLContext *uc; + int64_t size, total_size = 0; + unsigned int nodes_size = 0; + size_t i = 0; + int err = 0; + + if (!av_strstart(uri, "concatf:", &uri)) { + av_log(h, AV_LOG_ERROR, "URL %s lacks prefix\n", uri); + return AVERROR(EINVAL); + } + + /* handle input */ + if (!*uri) + return AVERROR(ENOENT); + + err = ffio_open_whitelist(&in, uri, AVIO_FLAG_READ, &h->interrupt_callback, + NULL, h->protocol_whitelist, h->protocol_blacklist); + if (err < 0) + return err; + + av_bprint_init(&bp, 0, AV_BPRINT_SIZE_UNLIMITED); + + for (i = 0; !avio_feof(in); i++) { + size_t len = i; + + av_bprint_clear(&bp); + if ((err = read_line_to_bprint(in, &bp)) <= 0) { + if (err == 0 && i == 0) + err = AVERROR_INVALIDDATA; + else if (err == AVERROR_EOF) + err = 0; + break; + } + + if (++len == SIZE_MAX / sizeof(*nodes)) { + err = AVERROR(ENAMETOOLONG); + break; + } + + /* creating URLContext */ + err = ffurl_open_whitelist(&uc, bp.str, flags, + &h->interrupt_callback, NULL, h->protocol_whitelist, h->protocol_blacklist, h); + if (err < 0) + break; + + /* creating size */ + if ((size = ffurl_size(uc)) < 0) { + ffurl_close(uc); + err = AVERROR(ENOSYS); + break; + } + + nodes = av_fast_realloc(data->nodes, &nodes_size, sizeof(*nodes) * len); + if (!nodes) { + ffurl_close(uc); + err = AVERROR(ENOMEM); + break; + } + data->nodes = nodes; + + /* assembling */ + data->nodes[i].uc = uc; + data->nodes[i].size = size; + total_size += size; + } + avio_closep(&in); + av_bprint_finalize(&bp, NULL); + data->length = i; + + if (err < 0) + concat_close(h); + + data->total_size = total_size; + return err; +} + +const URLProtocol ff_concatf_protocol = { + .name = "concatf", + .url_open = concatf_open, + .url_read = concat_read, + .url_seek = concat_seek, + .url_close = concat_close, + .priv_data_size = sizeof(struct concat_data), + .default_whitelist = "concatf,concat,file,subfile", +}; +#endif diff --git a/libavformat/protocols.c b/libavformat/protocols.c index 4b6b1c8e98..7f08f151b6 100644 --- a/libavformat/protocols.c +++ b/libavformat/protocols.c @@ -27,6 +27,7 @@ extern const URLProtocol ff_async_protocol; extern const URLProtocol ff_bluray_protocol; extern const URLProtocol ff_cache_protocol; extern const URLProtocol ff_concat_protocol; +extern const URLProtocol ff_concatf_protocol; extern const URLProtocol ff_crypto_protocol; extern const URLProtocol ff_data_protocol; extern const URLProtocol ff_ffrtmpcrypt_protocol;
Suggested-by: ffmpeg@fb.com Signed-off-by: James Almer <jamrial@gmail.com> --- Updated documentation, and line breaks can now be part of the filename. doc/protocols.texi | 33 +++++++++ libavformat/Makefile | 1 + libavformat/concat.c | 146 ++++++++++++++++++++++++++++++++++++++++ libavformat/protocols.c | 1 + 4 files changed, 181 insertions(+)