[FFmpeg-devel,2/2,v2] avcodec/cbs_vp9: keep track of reference frames

Submitted by James Almer on Oct. 26, 2018, 10:07 p.m.

Details

Message ID 20181026220717.3552-1-jamrial@gmail.com
State New
Headers show

Commit Message

James Almer Oct. 26, 2018, 10:07 p.m.
Signed-off-by: James Almer <jamrial@gmail.com>
---
 libavcodec/cbs_vp9.h                 | 13 +++++++++
 libavcodec/cbs_vp9_syntax_template.c | 40 +++++++++++++++++++++++++---
 2 files changed, 49 insertions(+), 4 deletions(-)

Comments

Mark Thompson Oct. 27, 2018, 7:28 p.m.
On 26/10/18 23:07, James Almer wrote:
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
>  libavcodec/cbs_vp9.h                 | 13 +++++++++
>  libavcodec/cbs_vp9_syntax_template.c | 40 +++++++++++++++++++++++++---
>  2 files changed, 49 insertions(+), 4 deletions(-)
> 
> diff --git a/libavcodec/cbs_vp9.h b/libavcodec/cbs_vp9.h
> index 5b99c90c2e..7eee6d5e9e 100644
> --- a/libavcodec/cbs_vp9.h
> +++ b/libavcodec/cbs_vp9.h
> @@ -183,6 +183,13 @@ typedef struct VP9RawSuperframe {
>      VP9RawSuperframeIndex index;
>  } VP9RawSuperframe;
>  
> +typedef struct VP9ReferenceFrameState {
> +    int frame_width;    // RefFrameWidth
> +    int frame_height;   // RefFrameHeight
> +    int subsampling_x;  // RefSubsamplingX
> +    int subsampling_y;  // RefSubsamplingY
> +    int bit_depth;      // RefBitDepth
> +} VP9ReferenceFrameState;
>  
>  typedef struct CodedBitstreamVP9Context {
>      // Frame dimensions in 8x8 mode info blocks.
> @@ -192,6 +199,12 @@ typedef struct CodedBitstreamVP9Context {
>      uint16_t sb64_cols;
>      uint16_t sb64_rows;
>  
> +    int bit_depth;
> +    int frame_width;
> +    int frame_height;
> +
> +    VP9ReferenceFrameState ref[VP9_NUM_REF_FRAMES];
> +
>      // Write buffer.
>      uint8_t *write_buffer;
>      size_t write_buffer_size;
> diff --git a/libavcodec/cbs_vp9_syntax_template.c b/libavcodec/cbs_vp9_syntax_template.c
> index b4a7f65e85..cd5b83a4f5 100644
> --- a/libavcodec/cbs_vp9_syntax_template.c
> +++ b/libavcodec/cbs_vp9_syntax_template.c
> @@ -43,10 +43,14 @@ static int FUNC(frame_sync_code)(CodedBitstreamContext *ctx, RWContext *rw,
>  static int FUNC(color_config)(CodedBitstreamContext *ctx, RWContext *rw,
>                                VP9RawFrameHeader *current, int profile)
>  {
> +    CodedBitstreamVP9Context *vp9 = ctx->priv_data;
>      int err;
>  
> -    if (profile >= 2)
> +    if (profile >= 2) {
>          f(1, ten_or_twelve_bit);
> +        vp9->bit_depth = current->ten_or_twelve_bit ? 12 : 10;
> +    } else
> +        vp9->bit_depth = 8;
>  
>      f(3, color_space);
>  
> @@ -81,8 +85,11 @@ static int FUNC(frame_size)(CodedBitstreamContext *ctx, RWContext *rw,
>      f(16, frame_width_minus_1);
>      f(16, frame_height_minus_1);
>  
> -    vp9->mi_cols = (current->frame_width_minus_1  + 8) >> 3;
> -    vp9->mi_rows = (current->frame_height_minus_1 + 8) >> 3;
> +    vp9->frame_width  = current->frame_width_minus_1  + 1;
> +    vp9->frame_height = current->frame_height_minus_1 + 1;
> +
> +    vp9->mi_cols = (vp9->frame_width  + 7) >> 3;
> +    vp9->mi_rows = (vp9->frame_height + 7) >> 3;
>      vp9->sb64_cols = (vp9->mi_cols + 7) >> 3;
>      vp9->sb64_rows = (vp9->mi_rows + 7) >> 3;
>  
> @@ -107,12 +114,24 @@ static int FUNC(render_size)(CodedBitstreamContext *ctx, RWContext *rw,
>  static int FUNC(frame_size_with_refs)(CodedBitstreamContext *ctx, RWContext *rw,
>                                        VP9RawFrameHeader *current)
>  {
> +    CodedBitstreamVP9Context *vp9 = ctx->priv_data;
>      int err, i;
>  
>      for (i = 0; i < VP9_REFS_PER_FRAME; i++) {
>          fs(1, found_ref[i], 1, i);
> -        if (current->found_ref[i])
> +        if (current->found_ref[i]) {
> +            VP9ReferenceFrameState *ref =
> +                &vp9->ref[current->ref_frame_idx[i]];
> +
> +            vp9->frame_width  = ref->frame_width;
> +            vp9->frame_height = ref->frame_height;
> +
> +            vp9->mi_cols = (vp9->frame_width  + 7) >> 3;
> +            vp9->mi_rows = (vp9->frame_height + 7) >> 3;
> +            vp9->sb64_cols = (vp9->mi_cols + 7) >> 3;
> +            vp9->sb64_rows = (vp9->mi_rows + 7) >> 3;
>              break;
> +        }
>      }
>      if (i >= VP9_REFS_PER_FRAME)
>          CHECK(FUNC(frame_size)(ctx, rw, current));
> @@ -249,6 +268,7 @@ static int FUNC(tile_info)(CodedBitstreamContext *ctx, RWContext *rw,
>  static int FUNC(uncompressed_header)(CodedBitstreamContext *ctx, RWContext *rw,
>                                       VP9RawFrameHeader *current)
>  {
> +    CodedBitstreamVP9Context *vp9 = ctx->priv_data;
>      int profile, i;
>      int err;
>  
> @@ -339,6 +359,18 @@ static int FUNC(uncompressed_header)(CodedBitstreamContext *ctx, RWContext *rw,
>  
>      f(16, header_size_in_bytes);
>  
> +    for (i = 0; i < VP9_NUM_REF_FRAMES; i++) {
> +        if (current->refresh_frame_flags & (1 << i)) {
> +            vp9->ref[i] = (VP9ReferenceFrameState) {
> +                .frame_width    = vp9->frame_width,
> +                .frame_height   = vp9->frame_height,
> +                .subsampling_x  = current->subsampling_x,
> +                .subsampling_y  = current->subsampling_y,
> +                .bit_depth      = vp9->bit_depth,

These three fields need to get read back somewhere like width/height are, otherwise you're losing track of them on the first inter frame.

> +            };
> +        }
> +    }

(I added:

    av_log(ctx->log_ctx, AV_LOG_DEBUG, "Frame:  size %dx%d  "
           "subsample %dx%d  bit_depth %d  tiles %dx%d.\n",
           vp9->frame_width, vp9->frame_height,
           current->subsampling_x, current->subsampling_y,
           vp9->bit_depth, 1 << current->tile_cols_log2,
           1 << current->tile_rows_log2);

like AV1 has here to test.)

> +
>      return 0;
>  }
>  
> 

Rest looks right.

Thanks,

- Mark
James Almer Oct. 27, 2018, 8:11 p.m.
On 10/27/2018 4:28 PM, Mark Thompson wrote:
> On 26/10/18 23:07, James Almer wrote:
>> Signed-off-by: James Almer <jamrial@gmail.com>
>> ---
>>  libavcodec/cbs_vp9.h                 | 13 +++++++++
>>  libavcodec/cbs_vp9_syntax_template.c | 40 +++++++++++++++++++++++++---
>>  2 files changed, 49 insertions(+), 4 deletions(-)
>>
>> diff --git a/libavcodec/cbs_vp9.h b/libavcodec/cbs_vp9.h
>> index 5b99c90c2e..7eee6d5e9e 100644
>> --- a/libavcodec/cbs_vp9.h
>> +++ b/libavcodec/cbs_vp9.h
>> @@ -183,6 +183,13 @@ typedef struct VP9RawSuperframe {
>>      VP9RawSuperframeIndex index;
>>  } VP9RawSuperframe;
>>  
>> +typedef struct VP9ReferenceFrameState {
>> +    int frame_width;    // RefFrameWidth
>> +    int frame_height;   // RefFrameHeight
>> +    int subsampling_x;  // RefSubsamplingX
>> +    int subsampling_y;  // RefSubsamplingY
>> +    int bit_depth;      // RefBitDepth
>> +} VP9ReferenceFrameState;
>>  
>>  typedef struct CodedBitstreamVP9Context {
>>      // Frame dimensions in 8x8 mode info blocks.
>> @@ -192,6 +199,12 @@ typedef struct CodedBitstreamVP9Context {
>>      uint16_t sb64_cols;
>>      uint16_t sb64_rows;
>>  
>> +    int bit_depth;
>> +    int frame_width;
>> +    int frame_height;
>> +
>> +    VP9ReferenceFrameState ref[VP9_NUM_REF_FRAMES];
>> +
>>      // Write buffer.
>>      uint8_t *write_buffer;
>>      size_t write_buffer_size;
>> diff --git a/libavcodec/cbs_vp9_syntax_template.c b/libavcodec/cbs_vp9_syntax_template.c
>> index b4a7f65e85..cd5b83a4f5 100644
>> --- a/libavcodec/cbs_vp9_syntax_template.c
>> +++ b/libavcodec/cbs_vp9_syntax_template.c
>> @@ -43,10 +43,14 @@ static int FUNC(frame_sync_code)(CodedBitstreamContext *ctx, RWContext *rw,
>>  static int FUNC(color_config)(CodedBitstreamContext *ctx, RWContext *rw,
>>                                VP9RawFrameHeader *current, int profile)
>>  {
>> +    CodedBitstreamVP9Context *vp9 = ctx->priv_data;
>>      int err;
>>  
>> -    if (profile >= 2)
>> +    if (profile >= 2) {
>>          f(1, ten_or_twelve_bit);
>> +        vp9->bit_depth = current->ten_or_twelve_bit ? 12 : 10;
>> +    } else
>> +        vp9->bit_depth = 8;
>>  
>>      f(3, color_space);
>>  
>> @@ -81,8 +85,11 @@ static int FUNC(frame_size)(CodedBitstreamContext *ctx, RWContext *rw,
>>      f(16, frame_width_minus_1);
>>      f(16, frame_height_minus_1);
>>  
>> -    vp9->mi_cols = (current->frame_width_minus_1  + 8) >> 3;
>> -    vp9->mi_rows = (current->frame_height_minus_1 + 8) >> 3;
>> +    vp9->frame_width  = current->frame_width_minus_1  + 1;
>> +    vp9->frame_height = current->frame_height_minus_1 + 1;
>> +
>> +    vp9->mi_cols = (vp9->frame_width  + 7) >> 3;
>> +    vp9->mi_rows = (vp9->frame_height + 7) >> 3;
>>      vp9->sb64_cols = (vp9->mi_cols + 7) >> 3;
>>      vp9->sb64_rows = (vp9->mi_rows + 7) >> 3;
>>  
>> @@ -107,12 +114,24 @@ static int FUNC(render_size)(CodedBitstreamContext *ctx, RWContext *rw,
>>  static int FUNC(frame_size_with_refs)(CodedBitstreamContext *ctx, RWContext *rw,
>>                                        VP9RawFrameHeader *current)
>>  {
>> +    CodedBitstreamVP9Context *vp9 = ctx->priv_data;
>>      int err, i;
>>  
>>      for (i = 0; i < VP9_REFS_PER_FRAME; i++) {
>>          fs(1, found_ref[i], 1, i);
>> -        if (current->found_ref[i])
>> +        if (current->found_ref[i]) {
>> +            VP9ReferenceFrameState *ref =
>> +                &vp9->ref[current->ref_frame_idx[i]];
>> +
>> +            vp9->frame_width  = ref->frame_width;
>> +            vp9->frame_height = ref->frame_height;
>> +
>> +            vp9->mi_cols = (vp9->frame_width  + 7) >> 3;
>> +            vp9->mi_rows = (vp9->frame_height + 7) >> 3;
>> +            vp9->sb64_cols = (vp9->mi_cols + 7) >> 3;
>> +            vp9->sb64_rows = (vp9->mi_rows + 7) >> 3;
>>              break;
>> +        }
>>      }
>>      if (i >= VP9_REFS_PER_FRAME)
>>          CHECK(FUNC(frame_size)(ctx, rw, current));
>> @@ -249,6 +268,7 @@ static int FUNC(tile_info)(CodedBitstreamContext *ctx, RWContext *rw,
>>  static int FUNC(uncompressed_header)(CodedBitstreamContext *ctx, RWContext *rw,
>>                                       VP9RawFrameHeader *current)
>>  {
>> +    CodedBitstreamVP9Context *vp9 = ctx->priv_data;
>>      int profile, i;
>>      int err;
>>  
>> @@ -339,6 +359,18 @@ static int FUNC(uncompressed_header)(CodedBitstreamContext *ctx, RWContext *rw,
>>  
>>      f(16, header_size_in_bytes);
>>  
>> +    for (i = 0; i < VP9_NUM_REF_FRAMES; i++) {
>> +        if (current->refresh_frame_flags & (1 << i)) {
>> +            vp9->ref[i] = (VP9ReferenceFrameState) {
>> +                .frame_width    = vp9->frame_width,
>> +                .frame_height   = vp9->frame_height,
>> +                .subsampling_x  = current->subsampling_x,
>> +                .subsampling_y  = current->subsampling_y,
>> +                .bit_depth      = vp9->bit_depth,
> 
> These three fields need to get read back somewhere like width/height are, otherwise you're losing track of them on the first inter frame.

The spec only mentions them in the section about how to output a frame
at the end of the decoding process when show_existing_frame == 1, but I
guess adding them to CodedBitstreamVP9Context and reading them in the
found_ref loop from frame_size_with_refs would be ok?

AV1 doesn't read these anywhere either but it doesn't seem to have this
issue seeing that, unlike VP9, subsampling_* and bit_depth are explicit
in color_config() and always read regardless of path.

> 
>> +            };
>> +        }
>> +    }
> 
> (I added:
> 
>     av_log(ctx->log_ctx, AV_LOG_DEBUG, "Frame:  size %dx%d  "
>            "subsample %dx%d  bit_depth %d  tiles %dx%d.\n",
>            vp9->frame_width, vp9->frame_height,
>            current->subsampling_x, current->subsampling_y,
>            vp9->bit_depth, 1 << current->tile_cols_log2,
>            1 << current->tile_rows_log2);
> 
> like AV1 has here to test.)
> 
>> +
>>      return 0;
>>  }
>>  
>>
> 
> Rest looks right.
> 
> Thanks,
> 
> - Mark
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>

Patch hide | download patch | download mbox

diff --git a/libavcodec/cbs_vp9.h b/libavcodec/cbs_vp9.h
index 5b99c90c2e..7eee6d5e9e 100644
--- a/libavcodec/cbs_vp9.h
+++ b/libavcodec/cbs_vp9.h
@@ -183,6 +183,13 @@  typedef struct VP9RawSuperframe {
     VP9RawSuperframeIndex index;
 } VP9RawSuperframe;
 
+typedef struct VP9ReferenceFrameState {
+    int frame_width;    // RefFrameWidth
+    int frame_height;   // RefFrameHeight
+    int subsampling_x;  // RefSubsamplingX
+    int subsampling_y;  // RefSubsamplingY
+    int bit_depth;      // RefBitDepth
+} VP9ReferenceFrameState;
 
 typedef struct CodedBitstreamVP9Context {
     // Frame dimensions in 8x8 mode info blocks.
@@ -192,6 +199,12 @@  typedef struct CodedBitstreamVP9Context {
     uint16_t sb64_cols;
     uint16_t sb64_rows;
 
+    int bit_depth;
+    int frame_width;
+    int frame_height;
+
+    VP9ReferenceFrameState ref[VP9_NUM_REF_FRAMES];
+
     // Write buffer.
     uint8_t *write_buffer;
     size_t write_buffer_size;
diff --git a/libavcodec/cbs_vp9_syntax_template.c b/libavcodec/cbs_vp9_syntax_template.c
index b4a7f65e85..cd5b83a4f5 100644
--- a/libavcodec/cbs_vp9_syntax_template.c
+++ b/libavcodec/cbs_vp9_syntax_template.c
@@ -43,10 +43,14 @@  static int FUNC(frame_sync_code)(CodedBitstreamContext *ctx, RWContext *rw,
 static int FUNC(color_config)(CodedBitstreamContext *ctx, RWContext *rw,
                               VP9RawFrameHeader *current, int profile)
 {
+    CodedBitstreamVP9Context *vp9 = ctx->priv_data;
     int err;
 
-    if (profile >= 2)
+    if (profile >= 2) {
         f(1, ten_or_twelve_bit);
+        vp9->bit_depth = current->ten_or_twelve_bit ? 12 : 10;
+    } else
+        vp9->bit_depth = 8;
 
     f(3, color_space);
 
@@ -81,8 +85,11 @@  static int FUNC(frame_size)(CodedBitstreamContext *ctx, RWContext *rw,
     f(16, frame_width_minus_1);
     f(16, frame_height_minus_1);
 
-    vp9->mi_cols = (current->frame_width_minus_1  + 8) >> 3;
-    vp9->mi_rows = (current->frame_height_minus_1 + 8) >> 3;
+    vp9->frame_width  = current->frame_width_minus_1  + 1;
+    vp9->frame_height = current->frame_height_minus_1 + 1;
+
+    vp9->mi_cols = (vp9->frame_width  + 7) >> 3;
+    vp9->mi_rows = (vp9->frame_height + 7) >> 3;
     vp9->sb64_cols = (vp9->mi_cols + 7) >> 3;
     vp9->sb64_rows = (vp9->mi_rows + 7) >> 3;
 
@@ -107,12 +114,24 @@  static int FUNC(render_size)(CodedBitstreamContext *ctx, RWContext *rw,
 static int FUNC(frame_size_with_refs)(CodedBitstreamContext *ctx, RWContext *rw,
                                       VP9RawFrameHeader *current)
 {
+    CodedBitstreamVP9Context *vp9 = ctx->priv_data;
     int err, i;
 
     for (i = 0; i < VP9_REFS_PER_FRAME; i++) {
         fs(1, found_ref[i], 1, i);
-        if (current->found_ref[i])
+        if (current->found_ref[i]) {
+            VP9ReferenceFrameState *ref =
+                &vp9->ref[current->ref_frame_idx[i]];
+
+            vp9->frame_width  = ref->frame_width;
+            vp9->frame_height = ref->frame_height;
+
+            vp9->mi_cols = (vp9->frame_width  + 7) >> 3;
+            vp9->mi_rows = (vp9->frame_height + 7) >> 3;
+            vp9->sb64_cols = (vp9->mi_cols + 7) >> 3;
+            vp9->sb64_rows = (vp9->mi_rows + 7) >> 3;
             break;
+        }
     }
     if (i >= VP9_REFS_PER_FRAME)
         CHECK(FUNC(frame_size)(ctx, rw, current));
@@ -249,6 +268,7 @@  static int FUNC(tile_info)(CodedBitstreamContext *ctx, RWContext *rw,
 static int FUNC(uncompressed_header)(CodedBitstreamContext *ctx, RWContext *rw,
                                      VP9RawFrameHeader *current)
 {
+    CodedBitstreamVP9Context *vp9 = ctx->priv_data;
     int profile, i;
     int err;
 
@@ -339,6 +359,18 @@  static int FUNC(uncompressed_header)(CodedBitstreamContext *ctx, RWContext *rw,
 
     f(16, header_size_in_bytes);
 
+    for (i = 0; i < VP9_NUM_REF_FRAMES; i++) {
+        if (current->refresh_frame_flags & (1 << i)) {
+            vp9->ref[i] = (VP9ReferenceFrameState) {
+                .frame_width    = vp9->frame_width,
+                .frame_height   = vp9->frame_height,
+                .subsampling_x  = current->subsampling_x,
+                .subsampling_y  = current->subsampling_y,
+                .bit_depth      = vp9->bit_depth,
+            };
+        }
+    }
+
     return 0;
 }