[FFmpeg-devel,1/2] avformat/aviobuf: add ff_read_line_to_bprint and ff_read_line_to_bprint_overwrite functions

Submitted by Marton Balint on Feb. 10, 2018, 5:12 p.m.

Details

Message ID 20180210171206.29686-1-cus@passwd.hu
State New
Headers show

Commit Message

Marton Balint Feb. 10, 2018, 5:12 p.m.
To be able to read lines longer than a static buffer size.

Signed-off-by: Marton Balint <cus@passwd.hu>
---
 libavformat/aviobuf.c  | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 libavformat/internal.h | 26 ++++++++++++++++++++++++++
 2 files changed, 72 insertions(+)

Comments

Nicolas George Feb. 11, 2018, 1:43 p.m.
Marton Balint (2018-02-10):
> To be able to read lines longer than a static buffer size.
> 
> Signed-off-by: Marton Balint <cus@passwd.hu>
> ---
>  libavformat/aviobuf.c  | 46 ++++++++++++++++++++++++++++++++++++++++++++++
>  libavformat/internal.h | 26 ++++++++++++++++++++++++++
>  2 files changed, 72 insertions(+)
> 
> diff --git a/libavformat/aviobuf.c b/libavformat/aviobuf.c
> index 86eb6579f4..12cd73745d 100644
> --- a/libavformat/aviobuf.c
> +++ b/libavformat/aviobuf.c
> @@ -821,6 +821,52 @@ int ff_get_line(AVIOContext *s, char *buf, int maxlen)
>      return i;
>  }
>  
> +int64_t ff_read_line_to_bprint(AVIOContext *s, AVBPrint *bp)
> +{
> +    int len, end;
> +    int64_t read = 0;
> +    char tmp[1024];
> +    char c;
> +

> +    do {
> +        len = 0;
> +        do {
> +            c = avio_r8(s);
> +            end = (c == '\r' || c == '\n' || c == '\0');
> +            if (!end)
> +                tmp[len++] = c;
> +        } while (!end && len < sizeof(tmp));
> +        av_bprint_append_data(bp, tmp, len);
> +        read += len;
> +    } while (!end);

I think the code would be much simpler if you just call
av_bprint_chars() for each read character. The loop-within-loop makes
the logic a little hard to understand.

If speed is critic, then you could read directly into the bprint buffer,
but I do not think this is necessary.

> +
> +    if (c == '\r' && avio_r8(s) != '\n' && !avio_feof(s))
> +        avio_skip(s, -1);
> +
> +    if (!c && s->error)
> +        return s->error;
> +
> +    if (!c && !read && avio_feof(s))
> +        return AVERROR_EOF;
> +
> +    return read;
> +}
> +
> +int64_t ff_read_line_to_bprint_overwrite(AVIOContext *s, AVBPrint *bp)
> +{
> +    int64_t ret;
> +
> +    av_bprint_clear(bp);
> +    ret = ff_read_line_to_bprint(s, bp);
> +    if (ret < 0)
> +        return ret;
> +
> +    if (!av_bprint_is_complete(bp))
> +        return AVERROR(ENOMEM);
> +
> +    return bp->len;
> +}
> +
>  int avio_get_str(AVIOContext *s, int maxlen, char *buf, int buflen)
>  {
>      int i;
> diff --git a/libavformat/internal.h b/libavformat/internal.h
> index 0d08576c29..2ac7e2e1a0 100644
> --- a/libavformat/internal.h
> +++ b/libavformat/internal.h
> @@ -299,6 +299,32 @@ void ff_put_v(AVIOContext *bc, uint64_t val);
>   */
>  int ff_get_line(AVIOContext *s, char *buf, int maxlen);
>  
> +/**
> + * Read a whole line of text from AVIOContext to an AVBPrint buffer. Stop
> + * reading after reaching a \\r, a \\n, a \\r\\n, a \\0 or EOF.  The line
> + * ending characters are NOT included in the buffer, but they are skipped on
> + * the input.
> + *
> + * @param s the read-only AVIOContext
> + * @param bp the AVBPrint buffer
> + * @return the length of the read line, not including the line endings,
> + *         negative on error.
> + */
> +int64_t ff_read_line_to_bprint(AVIOContext *s, AVBPrint *bp);
> +
> +/**
> + * Read a whole line of text from AVIOContext to an AVBPrint buffer overwriting
> + * its contents. Stop reading after reaching a \\r, a \\n, a \\r\\n, a \\0 or
> + * EOF. The line ending characters are NOT included in the buffer, but they
> + * are skipped on the input.
> + *
> + * @param s the read-only AVIOContext
> + * @param bp the AVBPrint buffer
> + * @return the length of the read line not including the line endings,
> + *         negative on error, or if the buffer becomes truncated.
> + */
> +int64_t ff_read_line_to_bprint_overwrite(AVIOContext *s, AVBPrint *bp);
> +
>  #define SPACE_CHARS " \t\r\n"
>  
>  /**

Looks reasonable except for the remark above.

Regards,
Marton Balint Feb. 11, 2018, 9:38 p.m.
On Sun, 11 Feb 2018, Nicolas George wrote:

> Marton Balint (2018-02-10):
>> To be able to read lines longer than a static buffer size.
>>
>> Signed-off-by: Marton Balint <cus@passwd.hu>
>> ---
>>  libavformat/aviobuf.c  | 46 ++++++++++++++++++++++++++++++++++++++++++++++
>>  libavformat/internal.h | 26 ++++++++++++++++++++++++++
>>  2 files changed, 72 insertions(+)
>>
>> diff --git a/libavformat/aviobuf.c b/libavformat/aviobuf.c
>> index 86eb6579f4..12cd73745d 100644
>> --- a/libavformat/aviobuf.c
>> +++ b/libavformat/aviobuf.c
>> @@ -821,6 +821,52 @@ int ff_get_line(AVIOContext *s, char *buf, int maxlen)
>>      return i;
>>  }
>>
>> +int64_t ff_read_line_to_bprint(AVIOContext *s, AVBPrint *bp)
>> +{
>> +    int len, end;
>> +    int64_t read = 0;
>> +    char tmp[1024];
>> +    char c;
>> +
>
>> +    do {
>> +        len = 0;
>> +        do {
>> +            c = avio_r8(s);
>> +            end = (c == '\r' || c == '\n' || c == '\0');
>> +            if (!end)
>> +                tmp[len++] = c;
>> +        } while (!end && len < sizeof(tmp));
>> +        av_bprint_append_data(bp, tmp, len);
>> +        read += len;
>> +    } while (!end);
>
> I think the code would be much simpler if you just call
> av_bprint_chars() for each read character. The loop-within-loop makes
> the logic a little hard to understand.
>
> If speed is critic, then you could read directly into the bprint buffer,
> but I do not think this is necessary.

It is a 500% speed improvement on a 260 MB line compared to using 
av_bprint_chars, so I'd rather leave it as is. I can add a comment saying 
"for performance reasons we fill a temporary buffer, and use av_bprint 
functions on chunks of data".

Regards,
Marton
Nicolas George Feb. 14, 2018, 12:52 p.m.
Marton Balint (2018-02-11):
> It is a 500% speed improvement on a 260 MB line compared to using
> av_bprint_chars, so I'd rather leave it as is. I can add a comment saying
> "for performance reasons we fill a temporary buffer, and use av_bprint
> functions on chunks of data".

This is assuming reading the text file is the only thing that happens.
In practice, the lines will be parsed, tokenized, with probably quite a
few mallocs, making the overhead negligible. And if performance were
really critic, reading the file whole and splitting in memory would
probably be best.

But of course, since the optimization is already written and gives a
significant difference on pure tests, I have no objection as is.

Regards,

Patch hide | download patch | download mbox

diff --git a/libavformat/aviobuf.c b/libavformat/aviobuf.c
index 86eb6579f4..12cd73745d 100644
--- a/libavformat/aviobuf.c
+++ b/libavformat/aviobuf.c
@@ -821,6 +821,52 @@  int ff_get_line(AVIOContext *s, char *buf, int maxlen)
     return i;
 }
 
+int64_t ff_read_line_to_bprint(AVIOContext *s, AVBPrint *bp)
+{
+    int len, end;
+    int64_t read = 0;
+    char tmp[1024];
+    char c;
+
+    do {
+        len = 0;
+        do {
+            c = avio_r8(s);
+            end = (c == '\r' || c == '\n' || c == '\0');
+            if (!end)
+                tmp[len++] = c;
+        } while (!end && len < sizeof(tmp));
+        av_bprint_append_data(bp, tmp, len);
+        read += len;
+    } while (!end);
+
+    if (c == '\r' && avio_r8(s) != '\n' && !avio_feof(s))
+        avio_skip(s, -1);
+
+    if (!c && s->error)
+        return s->error;
+
+    if (!c && !read && avio_feof(s))
+        return AVERROR_EOF;
+
+    return read;
+}
+
+int64_t ff_read_line_to_bprint_overwrite(AVIOContext *s, AVBPrint *bp)
+{
+    int64_t ret;
+
+    av_bprint_clear(bp);
+    ret = ff_read_line_to_bprint(s, bp);
+    if (ret < 0)
+        return ret;
+
+    if (!av_bprint_is_complete(bp))
+        return AVERROR(ENOMEM);
+
+    return bp->len;
+}
+
 int avio_get_str(AVIOContext *s, int maxlen, char *buf, int buflen)
 {
     int i;
diff --git a/libavformat/internal.h b/libavformat/internal.h
index 0d08576c29..2ac7e2e1a0 100644
--- a/libavformat/internal.h
+++ b/libavformat/internal.h
@@ -299,6 +299,32 @@  void ff_put_v(AVIOContext *bc, uint64_t val);
  */
 int ff_get_line(AVIOContext *s, char *buf, int maxlen);
 
+/**
+ * Read a whole line of text from AVIOContext to an AVBPrint buffer. Stop
+ * reading after reaching a \\r, a \\n, a \\r\\n, a \\0 or EOF.  The line
+ * ending characters are NOT included in the buffer, but they are skipped on
+ * the input.
+ *
+ * @param s the read-only AVIOContext
+ * @param bp the AVBPrint buffer
+ * @return the length of the read line, not including the line endings,
+ *         negative on error.
+ */
+int64_t ff_read_line_to_bprint(AVIOContext *s, AVBPrint *bp);
+
+/**
+ * Read a whole line of text from AVIOContext to an AVBPrint buffer overwriting
+ * its contents. Stop reading after reaching a \\r, a \\n, a \\r\\n, a \\0 or
+ * EOF. The line ending characters are NOT included in the buffer, but they
+ * are skipped on the input.
+ *
+ * @param s the read-only AVIOContext
+ * @param bp the AVBPrint buffer
+ * @return the length of the read line not including the line endings,
+ *         negative on error, or if the buffer becomes truncated.
+ */
+int64_t ff_read_line_to_bprint_overwrite(AVIOContext *s, AVBPrint *bp);
+
 #define SPACE_CHARS " \t\r\n"
 
 /**