diff mbox

[FFmpeg-devel,v2,3/6] avcodec/cbs_vp9: Write frame data directly

Message ID 20191117073440.22718-4-andreas.rheinhardt@gmail.com
State Superseded
Headers show

Commit Message

Andreas Rheinhardt Nov. 17, 2019, 7:34 a.m. UTC
Writing a unit (always a frame) in cbs_vp9 used an intermediate buffer
to write the frame header followed by the frame data that was copied
into said buffer. Afterwards, the final buffer for the frame was
allocated and everything copied into this buffer. But it is trivial to
compute the needed size of the final buffer after having written the
header, so one can allocate the final buffer immediately and copy the
frame data directly into it.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@gmail.com>
---
 libavcodec/cbs.c          |  4 ++++
 libavcodec/cbs_internal.h |  6 ++++--
 libavcodec/cbs_vp9.c      | 25 +++++++++++++++++++------
 3 files changed, 27 insertions(+), 8 deletions(-)

Comments

Mark Thompson Nov. 18, 2019, 12:04 a.m. UTC | #1
On 17/11/2019 07:34, Andreas Rheinhardt wrote:
> Writing a unit (always a frame) in cbs_vp9 used an intermediate buffer
> to write the frame header followed by the frame data that was copied
> into said buffer. Afterwards, the final buffer for the frame was
> allocated and everything copied into this buffer. But it is trivial to
> compute the needed size of the final buffer after having written the
> header, so one can allocate the final buffer immediately and copy the
> frame data directly into it.
> 
> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@gmail.com>
> ---
>  libavcodec/cbs.c          |  4 ++++
>  libavcodec/cbs_internal.h |  6 ++++--
>  libavcodec/cbs_vp9.c      | 25 +++++++++++++++++++------
>  3 files changed, 27 insertions(+), 8 deletions(-)
> 
> diff --git a/libavcodec/cbs.c b/libavcodec/cbs.c
> index 0badb192d9..9ad2641f6d 100644
> --- a/libavcodec/cbs.c
> +++ b/libavcodec/cbs.c
> @@ -306,6 +306,10 @@ static int cbs_write_unit_data(CodedBitstreamContext *ctx,
>      init_put_bits(&pbc, ctx->write_buffer, ctx->write_buffer_size);
>  
>      ret = ctx->codec->write_unit(ctx, unit, &pbc);
> +    if (ret == 1) {
> +        // write_unit has already finished the unit.
> +        return 0;
> +    }
>      if (ret < 0) {
>          if (ret == AVERROR(ENOSPC)) {
>              // Overflow.
> diff --git a/libavcodec/cbs_internal.h b/libavcodec/cbs_internal.h
> index 4c5a535ca6..5768baa9ca 100644
> --- a/libavcodec/cbs_internal.h
> +++ b/libavcodec/cbs_internal.h
> @@ -44,8 +44,10 @@ typedef struct CodedBitstreamType {
>      int (*read_unit)(CodedBitstreamContext *ctx,
>                       CodedBitstreamUnit *unit);
>  
> -    // Write the data bitstream from unit->content into pbc.
> -    // Return value AVERROR(ENOSPC) indicates that pbc was too small.
> +    // Write the data bitstream from unit->content into pbc or into unit->data.
> +    // Return value AVERROR(ENOSPC) indicates that pbc was too small;
> +    // 1 indicates that the unit has already been finished by write_unit
> +    // (i.e. unit->data and unit->data_ref have been allocated and filled).
>      int (*write_unit)(CodedBitstreamContext *ctx,
>                        CodedBitstreamUnit *unit,
>                        PutBitContext *pbc);
> diff --git a/libavcodec/cbs_vp9.c b/libavcodec/cbs_vp9.c
> index 42e4dcf5ac..bc074c4631 100644
> --- a/libavcodec/cbs_vp9.c
> +++ b/libavcodec/cbs_vp9.c
> @@ -526,6 +526,7 @@ static int cbs_vp9_write_unit(CodedBitstreamContext *ctx,
>                                PutBitContext *pbc)
>  {
>      VP9RawFrame *frame = unit->content;
> +    size_t data_size, header_size;
>      int err;
>  
>      err = cbs_vp9_write_frame(ctx, pbc, frame);
> @@ -535,16 +536,28 @@ static int cbs_vp9_write_unit(CodedBitstreamContext *ctx,
>      // Frame must be byte-aligned.
>      av_assert0(put_bits_count(pbc) % 8 == 0);
>  
> +    data_size = header_size = put_bits_count(pbc) / 8;
> +    unit->data_bit_padding = 0;
> +    flush_put_bits(pbc);
> +
>      if (frame->data) {
> -        if (frame->data_size > put_bits_left(pbc) / 8)
> -            return AVERROR(ENOSPC);
> +        if (frame->data_size > INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE
> +                                       - header_size)
> +            return AVERROR(ENOMEM);
>  
> -        flush_put_bits(pbc);
> -        memcpy(put_bits_ptr(pbc), frame->data, frame->data_size);
> -        skip_put_bytes(pbc, frame->data_size);
> +        data_size += frame->data_size;
>      }
>  
> -    return 0;
> +    err = ff_cbs_alloc_unit_data(ctx, unit, data_size);
> +    if (err < 0)
> +        return err;
> +
> +    memcpy(unit->data, pbc->buf, header_size);
> +
> +    if (frame->data)
> +        memcpy(unit->data + header_size, frame->data, frame->data_size);
> +
> +    return 1;
>  }
>  
>  static int cbs_vp9_assemble_fragment(CodedBitstreamContext *ctx,
> 

I feel like the special return is saying the API isn't quite right in this and the following patches.  Can we do better with something like a write_unit_header / write_unit_everything_else pair?  (Maybe the second of those can have a pointer argument indicating how many bytes it wants to write?)

- Mark
Andreas Rheinhardt Nov. 18, 2019, 8:08 a.m. UTC | #2
On Mon, Nov 18, 2019 at 1:05 AM Mark Thompson <sw@jkqxz.net> wrote:

> On 17/11/2019 07:34, Andreas Rheinhardt wrote:
> > Writing a unit (always a frame) in cbs_vp9 used an intermediate buffer
> > to write the frame header followed by the frame data that was copied
> > into said buffer. Afterwards, the final buffer for the frame was
> > allocated and everything copied into this buffer. But it is trivial to
> > compute the needed size of the final buffer after having written the
> > header, so one can allocate the final buffer immediately and copy the
> > frame data directly into it.
> >
> > Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@gmail.com>
> > ---
> >  libavcodec/cbs.c          |  4 ++++
> >  libavcodec/cbs_internal.h |  6 ++++--
> >  libavcodec/cbs_vp9.c      | 25 +++++++++++++++++++------
> >  3 files changed, 27 insertions(+), 8 deletions(-)
> >
> > diff --git a/libavcodec/cbs.c b/libavcodec/cbs.c
> > index 0badb192d9..9ad2641f6d 100644
> > --- a/libavcodec/cbs.c
> > +++ b/libavcodec/cbs.c
> > @@ -306,6 +306,10 @@ static int
> cbs_write_unit_data(CodedBitstreamContext *ctx,
> >      init_put_bits(&pbc, ctx->write_buffer, ctx->write_buffer_size);
> >
> >      ret = ctx->codec->write_unit(ctx, unit, &pbc);
> > +    if (ret == 1) {
> > +        // write_unit has already finished the unit.
> > +        return 0;
> > +    }
> >      if (ret < 0) {
> >          if (ret == AVERROR(ENOSPC)) {
> >              // Overflow.
> > diff --git a/libavcodec/cbs_internal.h b/libavcodec/cbs_internal.h
> > index 4c5a535ca6..5768baa9ca 100644
> > --- a/libavcodec/cbs_internal.h
> > +++ b/libavcodec/cbs_internal.h
> > @@ -44,8 +44,10 @@ typedef struct CodedBitstreamType {
> >      int (*read_unit)(CodedBitstreamContext *ctx,
> >                       CodedBitstreamUnit *unit);
> >
> > -    // Write the data bitstream from unit->content into pbc.
> > -    // Return value AVERROR(ENOSPC) indicates that pbc was too small.
> > +    // Write the data bitstream from unit->content into pbc or into
> unit->data.
> > +    // Return value AVERROR(ENOSPC) indicates that pbc was too small;
> > +    // 1 indicates that the unit has already been finished by write_unit
> > +    // (i.e. unit->data and unit->data_ref have been allocated and
> filled).
> >      int (*write_unit)(CodedBitstreamContext *ctx,
> >                        CodedBitstreamUnit *unit,
> >                        PutBitContext *pbc);
> > diff --git a/libavcodec/cbs_vp9.c b/libavcodec/cbs_vp9.c
> > index 42e4dcf5ac..bc074c4631 100644
> > --- a/libavcodec/cbs_vp9.c
> > +++ b/libavcodec/cbs_vp9.c
> > @@ -526,6 +526,7 @@ static int cbs_vp9_write_unit(CodedBitstreamContext
> *ctx,
> >                                PutBitContext *pbc)
> >  {
> >      VP9RawFrame *frame = unit->content;
> > +    size_t data_size, header_size;
> >      int err;
> >
> >      err = cbs_vp9_write_frame(ctx, pbc, frame);
> > @@ -535,16 +536,28 @@ static int
> cbs_vp9_write_unit(CodedBitstreamContext *ctx,
> >      // Frame must be byte-aligned.
> >      av_assert0(put_bits_count(pbc) % 8 == 0);
> >
> > +    data_size = header_size = put_bits_count(pbc) / 8;
> > +    unit->data_bit_padding = 0;
> > +    flush_put_bits(pbc);
> > +
> >      if (frame->data) {
> > -        if (frame->data_size > put_bits_left(pbc) / 8)
> > -            return AVERROR(ENOSPC);
> > +        if (frame->data_size > INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE
> > +                                       - header_size)
> > +            return AVERROR(ENOMEM);
> >
> > -        flush_put_bits(pbc);
> > -        memcpy(put_bits_ptr(pbc), frame->data, frame->data_size);
> > -        skip_put_bytes(pbc, frame->data_size);
> > +        data_size += frame->data_size;
> >      }
> >
> > -    return 0;
> > +    err = ff_cbs_alloc_unit_data(ctx, unit, data_size);
> > +    if (err < 0)
> > +        return err;
> > +
> > +    memcpy(unit->data, pbc->buf, header_size);
> > +
> > +    if (frame->data)
> > +        memcpy(unit->data + header_size, frame->data, frame->data_size);
> > +
> > +    return 1;
> >  }
> >
> >  static int cbs_vp9_assemble_fragment(CodedBitstreamContext *ctx,
> >
>
> I feel like the special return is saying the API isn't quite right in this
> and the following patches.  Can we do better with something like a
> write_unit_header / write_unit_everything_else pair?  (Maybe the second of
> those can have a pointer argument indicating how many bytes it wants to
> write?)
>
> - Mark


I don't really understand your proposal: Are you suggesting to replace the
write_unit functions with pairs of functions (one pair for each
CodedBitstreamType)? If so, how does the generic code know which one of
these two to call? Or does it always call the first one first and then
maybe the second one? How does it know whether it should call the second
one or whether it should make a unit out of the content of the
PutBitContext if not by the return value (in which case there would be no
advantage to my proposal)?

And your pointer argument suggestion seems to indicate that writing
everything_else should proceed as follows: First, said function is called
and indicates how big the buffer has to be. Then it (or another function --
if write_unit_header is called first, it should have the pointer argument)
is called again and this time the buffer is ready (I presume the buffer is
allocated in the unit). And then it copies. Here are my objections to this:
1. It is more complicated than simply using the return value.
2. One would lose all the temporary variables when returning. (Admittedly,
this doesn't seem to be an issue with the current write_units functions).
3. If all the generic code does is calling ff_cbs_alloc_unit_data, then it
is just a complication; if it does more (I'm thinking of copying the
content of the PutBitContext into the beginning of the buffer), then it
isn't good either: Think of av1. Currently it writes the obu header, then
reserves space for the size field, then it writes the rest without the tile
data. Then it computes the obu size and how many bytes it needs for the
size field and it memoves the data written so far into the right position.
Then it copies the tile data (if present) into the right position in the
write buffer after which the whole unit is copied into a freshly allocated
buffer. If copying of everything written so far were automatic, then one
would have to perform the memove so that the beginning of the data is
correct. But this is unnecessary, because one can do the following (and
this is what I intend to do in an already 3/4 finished patch): Write the
unit without header or tile data. Find out how big the buffer needs to be,
allocate the buffer, write the header incl the size field into the
beginning of the buffer, copy the data from the PutBitContext directly
behind this and then copy the tile data.

Using the return value was actually a no-brainer for me (I even forgot to
mention this in the commit message). I don't see a solution Maybe using a
#define/enum constant would be more pleasing to you? How about
FF_CBS_UNIT_FINISHED?

- Andreas
diff mbox

Patch

diff --git a/libavcodec/cbs.c b/libavcodec/cbs.c
index 0badb192d9..9ad2641f6d 100644
--- a/libavcodec/cbs.c
+++ b/libavcodec/cbs.c
@@ -306,6 +306,10 @@  static int cbs_write_unit_data(CodedBitstreamContext *ctx,
     init_put_bits(&pbc, ctx->write_buffer, ctx->write_buffer_size);
 
     ret = ctx->codec->write_unit(ctx, unit, &pbc);
+    if (ret == 1) {
+        // write_unit has already finished the unit.
+        return 0;
+    }
     if (ret < 0) {
         if (ret == AVERROR(ENOSPC)) {
             // Overflow.
diff --git a/libavcodec/cbs_internal.h b/libavcodec/cbs_internal.h
index 4c5a535ca6..5768baa9ca 100644
--- a/libavcodec/cbs_internal.h
+++ b/libavcodec/cbs_internal.h
@@ -44,8 +44,10 @@  typedef struct CodedBitstreamType {
     int (*read_unit)(CodedBitstreamContext *ctx,
                      CodedBitstreamUnit *unit);
 
-    // Write the data bitstream from unit->content into pbc.
-    // Return value AVERROR(ENOSPC) indicates that pbc was too small.
+    // Write the data bitstream from unit->content into pbc or into unit->data.
+    // Return value AVERROR(ENOSPC) indicates that pbc was too small;
+    // 1 indicates that the unit has already been finished by write_unit
+    // (i.e. unit->data and unit->data_ref have been allocated and filled).
     int (*write_unit)(CodedBitstreamContext *ctx,
                       CodedBitstreamUnit *unit,
                       PutBitContext *pbc);
diff --git a/libavcodec/cbs_vp9.c b/libavcodec/cbs_vp9.c
index 42e4dcf5ac..bc074c4631 100644
--- a/libavcodec/cbs_vp9.c
+++ b/libavcodec/cbs_vp9.c
@@ -526,6 +526,7 @@  static int cbs_vp9_write_unit(CodedBitstreamContext *ctx,
                               PutBitContext *pbc)
 {
     VP9RawFrame *frame = unit->content;
+    size_t data_size, header_size;
     int err;
 
     err = cbs_vp9_write_frame(ctx, pbc, frame);
@@ -535,16 +536,28 @@  static int cbs_vp9_write_unit(CodedBitstreamContext *ctx,
     // Frame must be byte-aligned.
     av_assert0(put_bits_count(pbc) % 8 == 0);
 
+    data_size = header_size = put_bits_count(pbc) / 8;
+    unit->data_bit_padding = 0;
+    flush_put_bits(pbc);
+
     if (frame->data) {
-        if (frame->data_size > put_bits_left(pbc) / 8)
-            return AVERROR(ENOSPC);
+        if (frame->data_size > INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE
+                                       - header_size)
+            return AVERROR(ENOMEM);
 
-        flush_put_bits(pbc);
-        memcpy(put_bits_ptr(pbc), frame->data, frame->data_size);
-        skip_put_bytes(pbc, frame->data_size);
+        data_size += frame->data_size;
     }
 
-    return 0;
+    err = ff_cbs_alloc_unit_data(ctx, unit, data_size);
+    if (err < 0)
+        return err;
+
+    memcpy(unit->data, pbc->buf, header_size);
+
+    if (frame->data)
+        memcpy(unit->data + header_size, frame->data, frame->data_size);
+
+    return 1;
 }
 
 static int cbs_vp9_assemble_fragment(CodedBitstreamContext *ctx,