[FFmpeg-devel] avformat/av1: update ff_isom_write_av1c() to the latest revision of the spec

Submitted by James Almer on Aug. 14, 2018, 6:09 p.m.

Details

Message ID 20180814180956.9176-1-jamrial@gmail.com
State Superseded
Headers show

Commit Message

James Almer Aug. 14, 2018, 6:09 p.m.
This will get ISOBMFF and Matroska up to date with the revised AV1 Codec
Configuration Box spec.
For now keep propagating raw OBUs as extradata until all libavcodec modules
are adapted to handle AV1CodecConfigurationRecordv1 formatted extradata.

Signed-off-by: James Almer <jamrial@gmail.com>
---
"All libavcodec modules" means basically the upcoming AV1 decoder and CBS
implementation. I prefer to wait until those are posted/pushed in order to
better coordinate how to do this.

 libavformat/av1.c         | 242 ++++++++++++++++++++++++++++++++++++++
 libavformat/matroskadec.c |   4 +
 libavformat/mov.c         |  11 +-
 libavformat/movenc.c      |   1 -
 4 files changed, 254 insertions(+), 4 deletions(-)

Comments

Thomas Daede Aug. 14, 2018, 9:28 p.m.
On 08/14/2018 11:09 AM, James Almer wrote:
> This will get ISOBMFF and Matroska up to date with the revised AV1 Codec
> Configuration Box spec.
> For now keep propagating raw OBUs as extradata until all libavcodec modules
> are adapted to handle AV1CodecConfigurationRecordv1 formatted extradata.
> 
> Signed-off-by: James Almer <jamrial@gmail.com>

I tested this patch and it works with my reader that I independently
updated to match the spec.

Patch hide | download patch | download mbox

diff --git a/libavformat/av1.c b/libavformat/av1.c
index db7f281fc7..670be9cd19 100644
--- a/libavformat/av1.c
+++ b/libavformat/av1.c
@@ -22,6 +22,8 @@ 
 #include "libavutil/mem.h"
 #include "libavcodec/av1.h"
 #include "libavcodec/av1_parse.h"
+#include "libavcodec/profiles.h"
+#include "libavcodec/put_bits.h"
 #include "av1.h"
 #include "avio.h"
 
@@ -73,9 +75,228 @@  int ff_av1_filter_obus_buf(const uint8_t *buf, uint8_t **out, int *size)
     return ret;
 }
 
+typedef struct AV1SequenceParameters {
+    uint8_t seq_profile;
+    uint8_t seq_level_idx_0;
+    uint8_t seq_tier_0;
+    uint8_t high_bitdepth;
+    uint8_t twelve_bit;
+    uint8_t monochrome;
+    uint8_t chroma_subsampling_x;
+    uint8_t chroma_subsampling_y;
+    uint8_t chroma_sample_position;
+} AV1SequenceParameters;
+
+static inline void uvlc(GetBitContext *gb)
+{
+    int leading_zeros = 0;
+
+    while (1) {
+        if (get_bits1(gb))
+            break;
+        leading_zeros++;
+    }
+
+    if (leading_zeros >= 32)
+        return;
+
+    skip_bits_long(gb, leading_zeros);
+}
+
+static int parse_color_config(AV1SequenceParameters *seq_params, GetBitContext *gb)
+{
+    int color_primaries, transfer_characteristics, matrix_coefficients;
+
+    seq_params->high_bitdepth = get_bits1(gb);
+    if (seq_params->seq_profile == FF_PROFILE_AV1_PROFESSIONAL && seq_params->high_bitdepth)
+        seq_params->twelve_bit = get_bits1(gb);
+    else
+        seq_params->twelve_bit = 0;
+
+    if (seq_params->seq_profile == FF_PROFILE_AV1_HIGH)
+        seq_params->monochrome = 0;
+    else
+        seq_params->monochrome = get_bits1(gb);
+
+    if (get_bits1(gb)) { // color_description_present_flag
+        color_primaries          = get_bits(gb, 8);
+        transfer_characteristics = get_bits(gb, 8);
+        matrix_coefficients      = get_bits(gb, 8);
+    } else {
+        color_primaries          = AVCOL_PRI_UNSPECIFIED;
+        transfer_characteristics = AVCOL_TRC_UNSPECIFIED;
+        matrix_coefficients      = AVCOL_SPC_UNSPECIFIED;
+    }
+
+    if (seq_params->monochrome) {
+        skip_bits1(gb); // color_range
+        seq_params->chroma_subsampling_x = 1;
+        seq_params->chroma_subsampling_y = 1;
+        seq_params->chroma_sample_position = 0;
+        return 0;
+    } else if (color_primaries          == AVCOL_PRI_BT709 &&
+               transfer_characteristics == AVCOL_TRC_IEC61966_2_1 &&
+               matrix_coefficients      == AVCOL_SPC_RGB) {
+        seq_params->chroma_subsampling_x = 0;
+        seq_params->chroma_subsampling_y = 0;
+    } else {
+        skip_bits1(gb); // color_range
+
+        if (seq_params->seq_profile == FF_PROFILE_AV1_MAIN) {
+            seq_params->chroma_subsampling_x = 1;
+            seq_params->chroma_subsampling_y = 1;
+        } else if (seq_params->seq_profile == FF_PROFILE_AV1_HIGH) {
+            seq_params->chroma_subsampling_x = 0;
+            seq_params->chroma_subsampling_y = 0;
+        } else {
+            if (seq_params->twelve_bit) {
+                seq_params->chroma_subsampling_x = get_bits1(gb);
+                if (seq_params->chroma_subsampling_x)
+                    seq_params->chroma_subsampling_y = get_bits1(gb);
+                else
+                    seq_params->chroma_subsampling_y = 0;
+            } else {
+                seq_params->chroma_subsampling_x = 1;
+                seq_params->chroma_subsampling_y = 0;
+            }
+        }
+        if (seq_params->chroma_subsampling_x && seq_params->chroma_subsampling_y)
+            seq_params->chroma_sample_position = get_bits(gb, 2);
+    }
+
+    skip_bits1(gb); // separate_uv_delta_q
+
+    return 0;
+}
+
+static int parse_sequence_header(AV1SequenceParameters *seq_params, const uint8_t *buf, int size)
+{
+    GetBitContext gb;
+    int reduced_still_picture_header;
+    int frame_width_bits_minus_1, frame_height_bits_minus_1;
+
+    int ret = init_get_bits8(&gb, buf, size);
+    if (ret < 0)
+        return ret;
+
+    seq_params->seq_profile = get_bits(&gb, 3);
+
+    skip_bits1(&gb); // still_picture
+    reduced_still_picture_header = get_bits1(&gb);
+
+    if (reduced_still_picture_header) {
+        seq_params->seq_level_idx_0 = get_bits(&gb, 5);
+        seq_params->seq_tier_0 = 0;
+    } else {
+        int initial_display_delay_present_flag, operating_points_cnt_minus_1;
+        int decoder_model_info_present_flag, buffer_delay_length_minus_1;
+
+        if (get_bits1(&gb)) { // timing_info_present_flag
+            skip_bits_long(&gb, 32); // num_units_in_display_tick
+            skip_bits_long(&gb, 32); // time_scale
+
+            if (get_bits1(&gb)) // equal_picture_interval
+                uvlc(&gb); // num_ticks_per_picture_minus_1
+
+            decoder_model_info_present_flag = get_bits1(&gb);
+            if (decoder_model_info_present_flag) {
+                buffer_delay_length_minus_1 = get_bits(&gb, 5);
+                skip_bits_long(&gb, 32); // num_units_in_decoding_tick
+                skip_bits(&gb, 10); // buffer_removal_time_length_minus_1 (5)
+                                    // frame_presentation_time_length_minus_1 (5)
+            }
+        } else
+            decoder_model_info_present_flag = 0;
+
+        initial_display_delay_present_flag = get_bits1(&gb);
+
+        operating_points_cnt_minus_1 = get_bits(&gb, 5);
+        for (int i = 0; i <= operating_points_cnt_minus_1; i++) {
+            int seq_level_idx, seq_tier;
+
+            skip_bits(&gb, 12); // operating_point_idc
+            seq_level_idx = get_bits(&gb, 5);
+
+            if (seq_level_idx > 7)
+                seq_tier = get_bits1(&gb);
+            else
+                seq_tier = 0;
+
+            if (decoder_model_info_present_flag) {
+                if (get_bits1(&gb)) { // decoder_model_present_for_this_op
+                    skip_bits_long(&gb, buffer_delay_length_minus_1 + 1); // decoder_buffer_delay
+                    skip_bits_long(&gb, buffer_delay_length_minus_1 + 1); // encoder_buffer_delay
+                    skip_bits1(&gb); // low_delay_mode_flag
+                }
+            }
+
+            if (initial_display_delay_present_flag) {
+                if (get_bits1(&gb)) // initial_display_delay_present_for_this_op
+                    skip_bits(&gb, 4); // initial_display_delay_minus_1
+            }
+
+            if (i == 0) {
+               seq_params->seq_level_idx_0 = seq_level_idx;
+               seq_params->seq_tier_0 = seq_tier;
+            }
+        }
+    }
+
+    frame_width_bits_minus_1  = get_bits(&gb, 4);
+    frame_height_bits_minus_1 = get_bits(&gb, 4);
+
+    skip_bits(&gb, frame_width_bits_minus_1 + 1); // max_frame_width_minus_1
+    skip_bits(&gb, frame_height_bits_minus_1 + 1); // max_frame_height_minus_1
+
+    if (!reduced_still_picture_header) {
+        if (get_bits1(&gb)) // frame_id_numbers_present_flag
+            skip_bits(&gb, 7); // delta_frame_id_length_minus_2 (4), additional_frame_id_length_minus_1 (3)
+    }
+
+    skip_bits(&gb, 3); // use_128x128_superblock (1), enable_filter_intra (1), enable_intra_edge_filter (1)
+
+    if (!reduced_still_picture_header) {
+        int enable_order_hint, seq_force_screen_content_tools;
+
+        skip_bits(&gb, 4); // enable_intraintra_compound (1), enable_masked_compound (1)
+                           // enable_warped_motion (1), enable_dual_filter (1)
+
+        enable_order_hint = get_bits1(&gb);
+        if (enable_order_hint)
+            skip_bits(&gb, 2); // enable_jnt_comp (1), enable_ref_frame_mvs (1)
+
+        if (get_bits1(&gb)) // seq_choose_screen_content_tools
+            seq_force_screen_content_tools = 2;
+        else
+            seq_force_screen_content_tools = get_bits1(&gb);
+
+        if (seq_force_screen_content_tools) {
+            if (!get_bits1(&gb)) // seq_choose_integer_mv
+                skip_bits1(&gb); // seq_force_integer_mv
+        }
+
+        if (enable_order_hint)
+            skip_bits(&gb, 3); // order_hint_bits_minus_1
+    }
+
+    skip_bits(&gb, 3); // enable_superres (1), enable_cdef (1), enable_restoration (1)
+
+    parse_color_config(seq_params, &gb);
+
+    skip_bits1(&gb); // film_grain_params_present
+
+    if (get_bits_left(&gb) < 0)
+        return AVERROR_INVALIDDATA;
+
+    return 0;
+}
+
 int ff_isom_write_av1c(AVIOContext *pb, const uint8_t *buf, int size)
 {
     AVIOContext *seq_pb = NULL, *meta_pb = NULL;
+    AV1SequenceParameters seq_params;
+    PutBitContext pbc;
+    uint8_t header[4];
     uint8_t *seq = NULL, *meta = NULL;
     int64_t obu_size;
     int start_pos, type, temporal_id, spatial_id;
@@ -106,6 +327,10 @@  int ff_isom_write_av1c(AVIOContext *pb, const uint8_t *buf, int size)
                 ret = AVERROR_INVALIDDATA;
                 goto fail;
             }
+            ret = parse_sequence_header(&seq_params, buf + start_pos, obu_size);
+            if (ret < 0)
+                goto fail;
+
             avio_write(seq_pb, buf, len);
             break;
         case AV1_OBU_METADATA:
@@ -127,6 +352,23 @@  int ff_isom_write_av1c(AVIOContext *pb, const uint8_t *buf, int size)
         ret = AVERROR_INVALIDDATA;
         goto fail;
     }
+
+    init_put_bits(&pbc, header, sizeof(header));
+
+    put_bits(&pbc, 1, 1); // marker
+    put_bits(&pbc, 7, 1); // version
+    put_bits(&pbc, 3, seq_params.seq_profile);
+    put_bits(&pbc, 5, seq_params.seq_level_idx_0);
+    put_bits(&pbc, 1, seq_params.seq_tier_0);
+    put_bits(&pbc, 1, seq_params.high_bitdepth);
+    put_bits(&pbc, 1, seq_params.twelve_bit);
+    put_bits(&pbc, 1, seq_params.monochrome);
+    put_bits(&pbc, 1, seq_params.chroma_subsampling_x);
+    put_bits(&pbc, 1, seq_params.chroma_subsampling_y);
+    put_bits(&pbc, 2, seq_params.chroma_sample_position);
+    flush_put_bits(&pbc);
+
+    avio_write(pb, header, sizeof(header));
     avio_write(pb, seq, seq_size);
 
     meta_size = avio_close_dyn_buf(meta_pb, &meta);
diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index 1ded431b80..e6793988e1 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -2421,6 +2421,10 @@  static int matroska_parse_tracks(AVFormatContext *s)
             /* we don't need any value stored in CodecPrivate.
                make sure that it's not exported as extradata. */
             track->codec_priv.size = 0;
+        } else if (codec_id == AV_CODEC_ID_AV1 && track->codec_priv.size) {
+            /* For now, propagate only the OBUs, if any. Once libavcodec is
+               updated to handle isobmff style extradata this can be removed. */
+            extradata_offset = 4;
         }
         track->codec_priv.size -= extradata_offset;
 
diff --git a/libavformat/mov.c b/libavformat/mov.c
index c863047d79..7f3f972b37 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -5196,7 +5196,7 @@  static int mov_read_av1c(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         return 0;
     st = c->fc->streams[c->fc->nb_streams - 1];
 
-    if (atom.size < 5) {
+    if (atom.size < 8) {
         av_log(c->fc, AV_LOG_ERROR, "Empty AV1 Codec Configuration Box\n");
         return AVERROR_INVALIDDATA;
     }
@@ -5208,9 +5208,14 @@  static int mov_read_av1c(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     }
     avio_skip(pb, 3); /* flags */
 
-    avio_skip(pb, 1); /* reserved, initial_presentation_delay_present, initial_presentation_delay_minus_one */
+    /* For now, propagate only the OBUs, if any. Once libavcodec is
+       updated to handle isobmff style extradata this can be removed. */
+    avio_skip(pb, 4);
+
+    if (atom.size == 8)
+        return 0;
 
-    ret = ff_get_extradata(c->fc, st->codecpar, pb, atom.size - 5);
+    ret = ff_get_extradata(c->fc, st->codecpar, pb, atom.size - 8);
     if (ret < 0)
         return ret;
 
diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index d530f40cab..441bdfba9d 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -1172,7 +1172,6 @@  static int mov_write_av1c_tag(AVIOContext *pb, MOVTrack *track)
     ffio_wfourcc(pb, "av1C");
     avio_w8(pb, 0); /* version */
     avio_wb24(pb, 0); /* flags */
-    avio_w8(pb, 0); /* reserved (3), initial_presentation_delay_present (1), initial_presentation_delay_minus_one/reserved (4) */
     ff_isom_write_av1c(pb, track->vos_data, track->vos_len);
     return update_size(pb, pos);
 }