[FFmpeg-devel] avcodec/cbs_av1: add support for Scalability Metadata

Submitted by James Almer on April 14, 2019, 10:04 p.m.

Details

Message ID 20190414220412.8328-1-jamrial@gmail.com
State New
Headers show

Commit Message

James Almer April 14, 2019, 10:04 p.m.
Signed-off-by: James Almer <jamrial@gmail.com>
---
This will make the AV1RawObu struct weigh about ~3kb instead of ~1kb.

 libavcodec/av1.h                     | 33 ++++++++++++++++++++
 libavcodec/cbs_av1.h                 | 15 +++++++++-
 libavcodec/cbs_av1_syntax_template.c | 45 ++++++++++++++++++++++++++--
 3 files changed, 90 insertions(+), 3 deletions(-)

Comments

Mark Thompson April 16, 2019, 10:05 p.m.
On 14/04/2019 23:04, James Almer wrote:
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
> This will make the AV1RawObu struct weigh about ~3kb instead of ~1kb.
> 
>  libavcodec/av1.h                     | 33 ++++++++++++++++++++
>  libavcodec/cbs_av1.h                 | 15 +++++++++-
>  libavcodec/cbs_av1_syntax_template.c | 45 ++++++++++++++++++++++++++--
>  3 files changed, 90 insertions(+), 3 deletions(-)
> 
> diff --git a/libavcodec/av1.h b/libavcodec/av1.h
> index f2ec39c86b..6c0e32485b 100644
> --- a/libavcodec/av1.h
> +++ b/libavcodec/av1.h
> @@ -127,4 +127,37 @@ enum {
>      AV1_CSP_COLOCATED = 2, // -> AVCHROMA_LOC_TOPLEFT.
>  };
>  
> +// Scalability modes (section 6.7.5)
> +enum {
> +    AV1_SCALABILITY_L1T2 = 0,
> +    AV1_SCALABILITY_L1T3 = 1,
> +    AV1_SCALABILITY_L2T1 = 2,
> +    AV1_SCALABILITY_L2T2 = 3,
> +    AV1_SCALABILITY_L2T3 = 4,
> +    AV1_SCALABILITY_S2T1 = 5,
> +    AV1_SCALABILITY_S2T2 = 6,
> +    AV1_SCALABILITY_S2T3 = 7,
> +    AV1_SCALABILITY_L2T1h = 8,
> +    AV1_SCALABILITY_L2T2h = 9,
> +    AV1_SCALABILITY_L2T3h = 10,
> +    AV1_SCALABILITY_S2T1h = 11,
> +    AV1_SCALABILITY_S2T2h = 12,
> +    AV1_SCALABILITY_S2T3h = 13,
> +    AV1_SCALABILITY_SS = 14,
> +    AV1_SCALABILITY_L3T1 = 15,
> +    AV1_SCALABILITY_L3T2 = 16,
> +    AV1_SCALABILITY_L3T3 = 17,
> +    AV1_SCALABILITY_S3T1 = 18,
> +    AV1_SCALABILITY_S3T2 = 19,
> +    AV1_SCALABILITY_S3T3 = 20,
> +    AV1_SCALABILITY_L3T2_KEY = 21,
> +    AV1_SCALABILITY_L3T3_KEY = 22,
> +    AV1_SCALABILITY_L4T5_KEY = 23,
> +    AV1_SCALABILITY_L4T7_KEY = 24,
> +    AV1_SCALABILITY_L3T2_KEY_SHIFT = 25,
> +    AV1_SCALABILITY_L3T3_KEY_SHIFT = 26,
> +    AV1_SCALABILITY_L4T5_KEY_SHIFT = 27,
> +    AV1_SCALABILITY_L4T7_KEY_SHIFT = 28,
> +};
> +
>  #endif /* AVCODEC_AV1_H */
> diff --git a/libavcodec/cbs_av1.h b/libavcodec/cbs_av1.h
> index e799964b72..1fb668ada4 100644
> --- a/libavcodec/cbs_av1.h
> +++ b/libavcodec/cbs_av1.h
> @@ -325,7 +325,20 @@ typedef struct AV1RawMetadataHDRMDCV {
>  
>  typedef struct AV1RawMetadataScalability {
>      uint8_t scalability_mode_idc;
> -    // TODO: more stuff.
> +    uint8_t spatial_layers_cnt_minus_1;
> +    uint8_t spatial_layer_dimensions_present_flag;
> +    uint8_t spatial_layer_description_present_flag;
> +    uint8_t temporal_group_description_present_flag;
> +    uint8_t scalability_structure_reserved_3bits;
> +    uint16_t spatial_layer_max_width[4];
> +    uint16_t spatial_layer_max_height[4];
> +    uint8_t spatial_layer_ref_id[4];
> +    uint8_t temporal_group_size;
> +    uint8_t temporal_group_temporal_id[255];
> +    uint8_t temporal_group_temporal_switching_up_point_flag[255];
> +    uint8_t temporal_group_spatial_switching_up_point_flag[255];
> +    uint8_t temporal_group_ref_cnt[255];
> +    uint8_t temporal_group_ref_pic_diff[255][7];
>  } AV1RawMetadataScalability;
>  
>  typedef struct AV1RawMetadataITUTT35 {
> diff --git a/libavcodec/cbs_av1_syntax_template.c b/libavcodec/cbs_av1_syntax_template.c
> index 0e019aa113..ef48173470 100644
> --- a/libavcodec/cbs_av1_syntax_template.c
> +++ b/libavcodec/cbs_av1_syntax_template.c
> @@ -1653,12 +1653,53 @@ static int FUNC(metadata_hdr_mdcv)(CodedBitstreamContext *ctx, RWContext *rw,
>      return 0;
>  }
>  
> +static int FUNC(scalability_structure)(CodedBitstreamContext *ctx, RWContext *rw,
> +                                       AV1RawMetadataScalability *current)
> +{
> +    int err, i, j;
> +
> +    fb(2, spatial_layers_cnt_minus_1);
> +    flag(spatial_layer_dimensions_present_flag);
> +    flag(spatial_layer_description_present_flag);
> +    flag(temporal_group_description_present_flag);
> +    fc(3, scalability_structure_reserved_3bits, 0, 0);
> +    if (current->spatial_layer_dimensions_present_flag) {
> +        for (i = 0; i <= current->spatial_layers_cnt_minus_1; i++) {
> +            fbs(16, spatial_layer_max_width[i], 1, i);
> +            fbs(16, spatial_layer_max_height[i], 1, i);

Can we verify against the "must not be larger than max_frame_*_minus_1 + 1" constraint here?  (I think a sequence header should be available.)

> +        }
> +    }
> +    if (current->spatial_layer_description_present_flag) {
> +        for (i = 0; i <= current->spatial_layers_cnt_minus_1; i++)
> +            fbs(8, spatial_layer_ref_id[i], 1, i);
> +    }
> +    if (current->temporal_group_description_present_flag) {
> +        fb(8, temporal_group_size);
> +        for (i = 0; i < current->temporal_group_size; i++) {
> +            fbs(3, temporal_group_temporal_id[i], 1, i);
> +            flags(temporal_group_temporal_switching_up_point_flag[i], 1, i);
> +            flags(temporal_group_spatial_switching_up_point_flag[i], 1, i);
> +            fbs(3, temporal_group_ref_cnt[i], 1, i);
> +            for (j = 0; j < current->temporal_group_ref_cnt[i]; j++) {
> +                fbs(8, temporal_group_ref_pic_diff[i][j], 2, i, j);
> +            }
> +        }
> +    }
> +
> +    return 0;
> +}
> +
>  static int FUNC(metadata_scalability)(CodedBitstreamContext *ctx, RWContext *rw,
>                                        AV1RawMetadataScalability *current)
>  {
> -    // TODO: scalability metadata.
> +    int err;
>  
> -    return AVERROR_PATCHWELCOME;
> +    fb(8, scalability_mode_idc);
> +
> +    if (current->scalability_mode_idc == AV1_SCALABILITY_SS)
> +        CHECK(FUNC(scalability_structure)(ctx, rw, current));
> +
> +    return 0;
>  }
>  
>  static int FUNC(metadata_itut_t35)(CodedBitstreamContext *ctx, RWContext *rw,
> 

Looks fine.

Thanks,

- Mark
James Almer April 16, 2019, 10:26 p.m.
On 4/16/2019 7:05 PM, Mark Thompson wrote:
> On 14/04/2019 23:04, James Almer wrote:
>> Signed-off-by: James Almer <jamrial@gmail.com>
>> ---
>> This will make the AV1RawObu struct weigh about ~3kb instead of ~1kb.
>>
>>  libavcodec/av1.h                     | 33 ++++++++++++++++++++
>>  libavcodec/cbs_av1.h                 | 15 +++++++++-
>>  libavcodec/cbs_av1_syntax_template.c | 45 ++++++++++++++++++++++++++--
>>  3 files changed, 90 insertions(+), 3 deletions(-)
>>
>> diff --git a/libavcodec/av1.h b/libavcodec/av1.h
>> index f2ec39c86b..6c0e32485b 100644
>> --- a/libavcodec/av1.h
>> +++ b/libavcodec/av1.h
>> @@ -127,4 +127,37 @@ enum {
>>      AV1_CSP_COLOCATED = 2, // -> AVCHROMA_LOC_TOPLEFT.
>>  };
>>  
>> +// Scalability modes (section 6.7.5)
>> +enum {
>> +    AV1_SCALABILITY_L1T2 = 0,
>> +    AV1_SCALABILITY_L1T3 = 1,
>> +    AV1_SCALABILITY_L2T1 = 2,
>> +    AV1_SCALABILITY_L2T2 = 3,
>> +    AV1_SCALABILITY_L2T3 = 4,
>> +    AV1_SCALABILITY_S2T1 = 5,
>> +    AV1_SCALABILITY_S2T2 = 6,
>> +    AV1_SCALABILITY_S2T3 = 7,
>> +    AV1_SCALABILITY_L2T1h = 8,
>> +    AV1_SCALABILITY_L2T2h = 9,
>> +    AV1_SCALABILITY_L2T3h = 10,
>> +    AV1_SCALABILITY_S2T1h = 11,
>> +    AV1_SCALABILITY_S2T2h = 12,
>> +    AV1_SCALABILITY_S2T3h = 13,
>> +    AV1_SCALABILITY_SS = 14,
>> +    AV1_SCALABILITY_L3T1 = 15,
>> +    AV1_SCALABILITY_L3T2 = 16,
>> +    AV1_SCALABILITY_L3T3 = 17,
>> +    AV1_SCALABILITY_S3T1 = 18,
>> +    AV1_SCALABILITY_S3T2 = 19,
>> +    AV1_SCALABILITY_S3T3 = 20,
>> +    AV1_SCALABILITY_L3T2_KEY = 21,
>> +    AV1_SCALABILITY_L3T3_KEY = 22,
>> +    AV1_SCALABILITY_L4T5_KEY = 23,
>> +    AV1_SCALABILITY_L4T7_KEY = 24,
>> +    AV1_SCALABILITY_L3T2_KEY_SHIFT = 25,
>> +    AV1_SCALABILITY_L3T3_KEY_SHIFT = 26,
>> +    AV1_SCALABILITY_L4T5_KEY_SHIFT = 27,
>> +    AV1_SCALABILITY_L4T7_KEY_SHIFT = 28,
>> +};
>> +
>>  #endif /* AVCODEC_AV1_H */
>> diff --git a/libavcodec/cbs_av1.h b/libavcodec/cbs_av1.h
>> index e799964b72..1fb668ada4 100644
>> --- a/libavcodec/cbs_av1.h
>> +++ b/libavcodec/cbs_av1.h
>> @@ -325,7 +325,20 @@ typedef struct AV1RawMetadataHDRMDCV {
>>  
>>  typedef struct AV1RawMetadataScalability {
>>      uint8_t scalability_mode_idc;
>> -    // TODO: more stuff.
>> +    uint8_t spatial_layers_cnt_minus_1;
>> +    uint8_t spatial_layer_dimensions_present_flag;
>> +    uint8_t spatial_layer_description_present_flag;
>> +    uint8_t temporal_group_description_present_flag;
>> +    uint8_t scalability_structure_reserved_3bits;
>> +    uint16_t spatial_layer_max_width[4];
>> +    uint16_t spatial_layer_max_height[4];
>> +    uint8_t spatial_layer_ref_id[4];
>> +    uint8_t temporal_group_size;
>> +    uint8_t temporal_group_temporal_id[255];
>> +    uint8_t temporal_group_temporal_switching_up_point_flag[255];
>> +    uint8_t temporal_group_spatial_switching_up_point_flag[255];
>> +    uint8_t temporal_group_ref_cnt[255];
>> +    uint8_t temporal_group_ref_pic_diff[255][7];
>>  } AV1RawMetadataScalability;
>>  
>>  typedef struct AV1RawMetadataITUTT35 {
>> diff --git a/libavcodec/cbs_av1_syntax_template.c b/libavcodec/cbs_av1_syntax_template.c
>> index 0e019aa113..ef48173470 100644
>> --- a/libavcodec/cbs_av1_syntax_template.c
>> +++ b/libavcodec/cbs_av1_syntax_template.c
>> @@ -1653,12 +1653,53 @@ static int FUNC(metadata_hdr_mdcv)(CodedBitstreamContext *ctx, RWContext *rw,
>>      return 0;
>>  }
>>  
>> +static int FUNC(scalability_structure)(CodedBitstreamContext *ctx, RWContext *rw,
>> +                                       AV1RawMetadataScalability *current)
>> +{
>> +    int err, i, j;
>> +
>> +    fb(2, spatial_layers_cnt_minus_1);
>> +    flag(spatial_layer_dimensions_present_flag);
>> +    flag(spatial_layer_description_present_flag);
>> +    flag(temporal_group_description_present_flag);
>> +    fc(3, scalability_structure_reserved_3bits, 0, 0);
>> +    if (current->spatial_layer_dimensions_present_flag) {
>> +        for (i = 0; i <= current->spatial_layers_cnt_minus_1; i++) {
>> +            fbs(16, spatial_layer_max_width[i], 1, i);
>> +            fbs(16, spatial_layer_max_height[i], 1, i);
> 
> Can we verify against the "must not be larger than max_frame_*_minus_1 + 1" constraint here?  (I think a sequence header should be available.)

Ah, good catch. Added a check for that.
> 
>> +        }
>> +    }
>> +    if (current->spatial_layer_description_present_flag) {
>> +        for (i = 0; i <= current->spatial_layers_cnt_minus_1; i++)
>> +            fbs(8, spatial_layer_ref_id[i], 1, i);
>> +    }
>> +    if (current->temporal_group_description_present_flag) {
>> +        fb(8, temporal_group_size);
>> +        for (i = 0; i < current->temporal_group_size; i++) {
>> +            fbs(3, temporal_group_temporal_id[i], 1, i);
>> +            flags(temporal_group_temporal_switching_up_point_flag[i], 1, i);
>> +            flags(temporal_group_spatial_switching_up_point_flag[i], 1, i);
>> +            fbs(3, temporal_group_ref_cnt[i], 1, i);
>> +            for (j = 0; j < current->temporal_group_ref_cnt[i]; j++) {
>> +                fbs(8, temporal_group_ref_pic_diff[i][j], 2, i, j);
>> +            }
>> +        }
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>>  static int FUNC(metadata_scalability)(CodedBitstreamContext *ctx, RWContext *rw,
>>                                        AV1RawMetadataScalability *current)
>>  {
>> -    // TODO: scalability metadata.
>> +    int err;
>>  
>> -    return AVERROR_PATCHWELCOME;
>> +    fb(8, scalability_mode_idc);
>> +
>> +    if (current->scalability_mode_idc == AV1_SCALABILITY_SS)
>> +        CHECK(FUNC(scalability_structure)(ctx, rw, current));
>> +
>> +    return 0;
>>  }
>>  
>>  static int FUNC(metadata_itut_t35)(CodedBitstreamContext *ctx, RWContext *rw,
>>
> 
> Looks fine.
> 
> Thanks,
> 
> - Mark

Pushed, thanks!

Patch hide | download patch | download mbox

diff --git a/libavcodec/av1.h b/libavcodec/av1.h
index f2ec39c86b..6c0e32485b 100644
--- a/libavcodec/av1.h
+++ b/libavcodec/av1.h
@@ -127,4 +127,37 @@  enum {
     AV1_CSP_COLOCATED = 2, // -> AVCHROMA_LOC_TOPLEFT.
 };
 
+// Scalability modes (section 6.7.5)
+enum {
+    AV1_SCALABILITY_L1T2 = 0,
+    AV1_SCALABILITY_L1T3 = 1,
+    AV1_SCALABILITY_L2T1 = 2,
+    AV1_SCALABILITY_L2T2 = 3,
+    AV1_SCALABILITY_L2T3 = 4,
+    AV1_SCALABILITY_S2T1 = 5,
+    AV1_SCALABILITY_S2T2 = 6,
+    AV1_SCALABILITY_S2T3 = 7,
+    AV1_SCALABILITY_L2T1h = 8,
+    AV1_SCALABILITY_L2T2h = 9,
+    AV1_SCALABILITY_L2T3h = 10,
+    AV1_SCALABILITY_S2T1h = 11,
+    AV1_SCALABILITY_S2T2h = 12,
+    AV1_SCALABILITY_S2T3h = 13,
+    AV1_SCALABILITY_SS = 14,
+    AV1_SCALABILITY_L3T1 = 15,
+    AV1_SCALABILITY_L3T2 = 16,
+    AV1_SCALABILITY_L3T3 = 17,
+    AV1_SCALABILITY_S3T1 = 18,
+    AV1_SCALABILITY_S3T2 = 19,
+    AV1_SCALABILITY_S3T3 = 20,
+    AV1_SCALABILITY_L3T2_KEY = 21,
+    AV1_SCALABILITY_L3T3_KEY = 22,
+    AV1_SCALABILITY_L4T5_KEY = 23,
+    AV1_SCALABILITY_L4T7_KEY = 24,
+    AV1_SCALABILITY_L3T2_KEY_SHIFT = 25,
+    AV1_SCALABILITY_L3T3_KEY_SHIFT = 26,
+    AV1_SCALABILITY_L4T5_KEY_SHIFT = 27,
+    AV1_SCALABILITY_L4T7_KEY_SHIFT = 28,
+};
+
 #endif /* AVCODEC_AV1_H */
diff --git a/libavcodec/cbs_av1.h b/libavcodec/cbs_av1.h
index e799964b72..1fb668ada4 100644
--- a/libavcodec/cbs_av1.h
+++ b/libavcodec/cbs_av1.h
@@ -325,7 +325,20 @@  typedef struct AV1RawMetadataHDRMDCV {
 
 typedef struct AV1RawMetadataScalability {
     uint8_t scalability_mode_idc;
-    // TODO: more stuff.
+    uint8_t spatial_layers_cnt_minus_1;
+    uint8_t spatial_layer_dimensions_present_flag;
+    uint8_t spatial_layer_description_present_flag;
+    uint8_t temporal_group_description_present_flag;
+    uint8_t scalability_structure_reserved_3bits;
+    uint16_t spatial_layer_max_width[4];
+    uint16_t spatial_layer_max_height[4];
+    uint8_t spatial_layer_ref_id[4];
+    uint8_t temporal_group_size;
+    uint8_t temporal_group_temporal_id[255];
+    uint8_t temporal_group_temporal_switching_up_point_flag[255];
+    uint8_t temporal_group_spatial_switching_up_point_flag[255];
+    uint8_t temporal_group_ref_cnt[255];
+    uint8_t temporal_group_ref_pic_diff[255][7];
 } AV1RawMetadataScalability;
 
 typedef struct AV1RawMetadataITUTT35 {
diff --git a/libavcodec/cbs_av1_syntax_template.c b/libavcodec/cbs_av1_syntax_template.c
index 0e019aa113..ef48173470 100644
--- a/libavcodec/cbs_av1_syntax_template.c
+++ b/libavcodec/cbs_av1_syntax_template.c
@@ -1653,12 +1653,53 @@  static int FUNC(metadata_hdr_mdcv)(CodedBitstreamContext *ctx, RWContext *rw,
     return 0;
 }
 
+static int FUNC(scalability_structure)(CodedBitstreamContext *ctx, RWContext *rw,
+                                       AV1RawMetadataScalability *current)
+{
+    int err, i, j;
+
+    fb(2, spatial_layers_cnt_minus_1);
+    flag(spatial_layer_dimensions_present_flag);
+    flag(spatial_layer_description_present_flag);
+    flag(temporal_group_description_present_flag);
+    fc(3, scalability_structure_reserved_3bits, 0, 0);
+    if (current->spatial_layer_dimensions_present_flag) {
+        for (i = 0; i <= current->spatial_layers_cnt_minus_1; i++) {
+            fbs(16, spatial_layer_max_width[i], 1, i);
+            fbs(16, spatial_layer_max_height[i], 1, i);
+        }
+    }
+    if (current->spatial_layer_description_present_flag) {
+        for (i = 0; i <= current->spatial_layers_cnt_minus_1; i++)
+            fbs(8, spatial_layer_ref_id[i], 1, i);
+    }
+    if (current->temporal_group_description_present_flag) {
+        fb(8, temporal_group_size);
+        for (i = 0; i < current->temporal_group_size; i++) {
+            fbs(3, temporal_group_temporal_id[i], 1, i);
+            flags(temporal_group_temporal_switching_up_point_flag[i], 1, i);
+            flags(temporal_group_spatial_switching_up_point_flag[i], 1, i);
+            fbs(3, temporal_group_ref_cnt[i], 1, i);
+            for (j = 0; j < current->temporal_group_ref_cnt[i]; j++) {
+                fbs(8, temporal_group_ref_pic_diff[i][j], 2, i, j);
+            }
+        }
+    }
+
+    return 0;
+}
+
 static int FUNC(metadata_scalability)(CodedBitstreamContext *ctx, RWContext *rw,
                                       AV1RawMetadataScalability *current)
 {
-    // TODO: scalability metadata.
+    int err;
 
-    return AVERROR_PATCHWELCOME;
+    fb(8, scalability_mode_idc);
+
+    if (current->scalability_mode_idc == AV1_SCALABILITY_SS)
+        CHECK(FUNC(scalability_structure)(ctx, rw, current));
+
+    return 0;
 }
 
 static int FUNC(metadata_itut_t35)(CodedBitstreamContext *ctx, RWContext *rw,