diff mbox

[FFmpeg-devel,v2] avcodec: add AV1 frame split bitstream filter

Message ID 20190325152931.11464-1-jamrial@gmail.com
State Superseded
Headers show

Commit Message

James Almer March 25, 2019, 3:29 p.m. UTC
This will be needed by the eventual native AV1 decoder.

Signed-off-by: James Almer <jamrial@gmail.com>
---
Updated after recent changes to the CBS fragment handling API.

 configure                        |   1 +
 libavcodec/Makefile              |   1 +
 libavcodec/av1_frame_split_bsf.c | 248 +++++++++++++++++++++++++++++++
 libavcodec/bitstream_filters.c   |   1 +
 4 files changed, 251 insertions(+)
 create mode 100644 libavcodec/av1_frame_split_bsf.c

Comments

James Almer March 25, 2019, 4:42 p.m. UTC | #1
On 3/25/2019 12:29 PM, James Almer wrote:
> +        ret = ff_cbs_read_packet(s->cbc, td, s->buffer_pkt);
> +        if (ret < 0) {
> +            av_log(ctx, AV_LOG_ERROR, "Failed to parse temporal unit.\n");
> +            return ret;

> +    if (split) {
> +        AV1RawFrameHeader *frame = NULL;
> +        int cur_frame_type = -1, size = 0;
> +
> +        for (i = s->cur_frame_idx; i < td->nb_units; i++) {
> +            CodedBitstreamUnit *unit = &td->units[i];
> +
> +            size += unit->data_size;
> +            if (unit->type == AV1_OBU_FRAME) {
> +                AV1RawOBU *obu = unit->content;
> +
> +                if (frame) {
> +                    ret = AVERROR_INVALIDDATA;
> +                    goto fail;

> +            } else if (unit->type == AV1_OBU_FRAME_HEADER) {
> +                AV1RawOBU *obu = unit->content;
> +
> +                if (frame) {
> +                    ret = AVERROR_INVALIDDATA;
> +                    goto fail;

> +            } else if (unit->type == AV1_OBU_TILE_GROUP) {
> +                AV1RawOBU *obu = unit->content;
> +                AV1RawTileGroup *group = &obu->obu.tile_group;
> +
> +                if (!frame || cur_frame_type != AV1_OBU_FRAME_HEADER) {
> +                    ret = AVERROR_INVALIDDATA;
> +                    goto fail;

I'm not sure if we should abort and discard the packet in these cases,
or just pass it through.

In all these the Temporal Unit is invalid in some form (Orphaned Tile
Group OBUs, Frame Headers showing up when the Tile Groups from a
previous Frame Header were expected, etc), but maybe it should be left
to the decoder to decide what to do with them.

> +fail:
> +    if (ret < 0)
> +        av_packet_unref(out);
> +    av_packet_unref(s->buffer_pkt);
> +    ff_cbs_fragment_reset(s->cbc, td);
> +
> +    return ret;
> +}
Ronald S. Bultje March 25, 2019, 7:30 p.m. UTC | #2
Hi,

On Mon, Mar 25, 2019 at 12:42 PM James Almer <jamrial@gmail.com> wrote:

> On 3/25/2019 12:29 PM, James Almer wrote:
> > +        ret = ff_cbs_read_packet(s->cbc, td, s->buffer_pkt);
> > +        if (ret < 0) {
> > +            av_log(ctx, AV_LOG_ERROR, "Failed to parse temporal
> unit.\n");
> > +            return ret;
>
> > +    if (split) {
> > +        AV1RawFrameHeader *frame = NULL;
> > +        int cur_frame_type = -1, size = 0;
> > +
> > +        for (i = s->cur_frame_idx; i < td->nb_units; i++) {
> > +            CodedBitstreamUnit *unit = &td->units[i];
> > +
> > +            size += unit->data_size;
> > +            if (unit->type == AV1_OBU_FRAME) {
> > +                AV1RawOBU *obu = unit->content;
> > +
> > +                if (frame) {
> > +                    ret = AVERROR_INVALIDDATA;
> > +                    goto fail;
>
> > +            } else if (unit->type == AV1_OBU_FRAME_HEADER) {
> > +                AV1RawOBU *obu = unit->content;
> > +
> > +                if (frame) {
> > +                    ret = AVERROR_INVALIDDATA;
> > +                    goto fail;
>
> > +            } else if (unit->type == AV1_OBU_TILE_GROUP) {
> > +                AV1RawOBU *obu = unit->content;
> > +                AV1RawTileGroup *group = &obu->obu.tile_group;
> > +
> > +                if (!frame || cur_frame_type != AV1_OBU_FRAME_HEADER) {
> > +                    ret = AVERROR_INVALIDDATA;
> > +                    goto fail;
>
> I'm not sure if we should abort and discard the packet in these cases,
> or just pass it through.
>
> In all these the Temporal Unit is invalid in some form (Orphaned Tile
> Group OBUs, Frame Headers showing up when the Tile Groups from a
> previous Frame Header were expected, etc), but maybe it should be left
> to the decoder to decide what to do with them.


Is cur_frame_type the previous OBU's type? Is something like
framehdr-highdynamicrangframedata-tiledata valid?

Ronald
James Almer March 25, 2019, 7:58 p.m. UTC | #3
On 3/25/2019 4:30 PM, Ronald S. Bultje wrote:
> Hi,
> 
> On Mon, Mar 25, 2019 at 12:42 PM James Almer <jamrial@gmail.com> wrote:
> 
>> On 3/25/2019 12:29 PM, James Almer wrote:
>>> +        ret = ff_cbs_read_packet(s->cbc, td, s->buffer_pkt);
>>> +        if (ret < 0) {
>>> +            av_log(ctx, AV_LOG_ERROR, "Failed to parse temporal
>> unit.\n");
>>> +            return ret;
>>
>>> +    if (split) {
>>> +        AV1RawFrameHeader *frame = NULL;
>>> +        int cur_frame_type = -1, size = 0;
>>> +
>>> +        for (i = s->cur_frame_idx; i < td->nb_units; i++) {
>>> +            CodedBitstreamUnit *unit = &td->units[i];
>>> +
>>> +            size += unit->data_size;
>>> +            if (unit->type == AV1_OBU_FRAME) {
>>> +                AV1RawOBU *obu = unit->content;
>>> +
>>> +                if (frame) {
>>> +                    ret = AVERROR_INVALIDDATA;
>>> +                    goto fail;
>>
>>> +            } else if (unit->type == AV1_OBU_FRAME_HEADER) {
>>> +                AV1RawOBU *obu = unit->content;
>>> +
>>> +                if (frame) {
>>> +                    ret = AVERROR_INVALIDDATA;
>>> +                    goto fail;
>>
>>> +            } else if (unit->type == AV1_OBU_TILE_GROUP) {
>>> +                AV1RawOBU *obu = unit->content;
>>> +                AV1RawTileGroup *group = &obu->obu.tile_group;
>>> +
>>> +                if (!frame || cur_frame_type != AV1_OBU_FRAME_HEADER) {
>>> +                    ret = AVERROR_INVALIDDATA;
>>> +                    goto fail;
>>
>> I'm not sure if we should abort and discard the packet in these cases,
>> or just pass it through.
>>
>> In all these the Temporal Unit is invalid in some form (Orphaned Tile
>> Group OBUs, Frame Headers showing up when the Tile Groups from a
>> previous Frame Header were expected, etc), but maybe it should be left
>> to the decoder to decide what to do with them.
> 
> 
> Is cur_frame_type the previous OBU's type?

cur_frame_type is set to either Frame or Frame Header based on the last
of such OBU type seen, and checked when parsing a Tile Group OBU in
order to abort if it was not a Frame Header (Tile Group OBUs must come
after a Frame Header OBU).

I only added it because in the case where i want to include all the
trailing OBUs when the last frame is the Frame OBU kind, if one of said
trailing OBUs is an orphaned Tile Group then it means the TU is broken.

> Is something like framehdr-highdynamicrangframedata-tiledata valid?

I don't know. The spec says:

"A coded video sequence consists of one or more temporal units. A
temporal unit consists of a series of OBUs starting from a temporal
delimiter, optional sequence headers, optional metadata OBUs, a sequence
of one or more frame headers, each followed by zero or more tile group
OBUs as well as optional padding OBUs."

It would seem it's not valid or at least not expected, but nothing in
practice should prevent decoding with such ordering.
It doesn't seem to be too strict about what's meant to be after a Frame
Header other than its Tile Groups (The sentence is lacking the usual
must/shall/should wording), but either way this bsf will handle that
just fine. It makes sure to split starting from a Frame Header all the
way to last Tile Group for the frame, including everything in between,
and everything after if it's the last frame. It doesn't do any reordering.
diff mbox

Patch

diff --git a/configure b/configure
index 331393f8d5..ed33490797 100755
--- a/configure
+++ b/configure
@@ -3076,6 +3076,7 @@  vc1_parser_select="vc1dsp"

 # bitstream_filters
 aac_adtstoasc_bsf_select="adts_header"
+av1_frame_split_select="cbs_av1"
 av1_metadata_bsf_select="cbs_av1"
 eac3_core_bsf_select="ac3_parser"
 filter_units_bsf_select="cbs"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 15c43a8a6a..27f326247d 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1065,6 +1065,7 @@  OBJS-$(CONFIG_XMA_PARSER)              += xma_parser.o
 # bitstream filters
 OBJS-$(CONFIG_AAC_ADTSTOASC_BSF)          += aac_adtstoasc_bsf.o mpeg4audio.o
 OBJS-$(CONFIG_AV1_METADATA_BSF)           += av1_metadata_bsf.o
+OBJS-$(CONFIG_AV1_FRAME_SPLIT_BSF)        += av1_frame_split_bsf.o
 OBJS-$(CONFIG_CHOMP_BSF)                  += chomp_bsf.o
 OBJS-$(CONFIG_DUMP_EXTRADATA_BSF)         += dump_extradata_bsf.o
 OBJS-$(CONFIG_DCA_CORE_BSF)               += dca_core_bsf.o
diff --git a/libavcodec/av1_frame_split_bsf.c b/libavcodec/av1_frame_split_bsf.c
new file mode 100644
index 0000000000..2ff58e464f
--- /dev/null
+++ b/libavcodec/av1_frame_split_bsf.c
@@ -0,0 +1,246 @@ 
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * This bitstream filter splits AV1 Temporal Units into packets containing
+ * just one frame.
+ */
+
+#include "libavutil/avassert.h"
+
+#include "avcodec.h"
+#include "bsf.h"
+#include "cbs.h"
+#include "cbs_av1.h"
+
+typedef struct AV1FSplitContext {
+    AVPacket *buffer_pkt;
+    CodedBitstreamContext *cbc;
+    CodedBitstreamFragment temporal_unit;
+
+    int nb_frames;
+    int cur_frame;
+    int cur_frame_idx;
+    int last_frame_idx;
+} AV1FSplitContext;
+
+static int av1_frame_split_filter(AVBSFContext *ctx, AVPacket *out)
+{
+    AV1FSplitContext *s = ctx->priv_data;
+    CodedBitstreamFragment *td = &s->temporal_unit;
+    int i, ret;
+    int split = !!s->buffer_pkt->data;
+
+    if (!s->buffer_pkt->data) {
+        int nb_frames = 0;
+
+        ret = ff_bsf_get_packet_ref(ctx, s->buffer_pkt);
+        if (ret < 0)
+            return ret;
+
+        ret = ff_cbs_read_packet(s->cbc, td, s->buffer_pkt);
+        if (ret < 0) {
+            av_log(ctx, AV_LOG_ERROR, "Failed to parse temporal unit.\n");
+            return ret;
+        }
+
+        for (i = 0; i < td->nb_units; i++) {
+            CodedBitstreamUnit *unit = &td->units[i];
+
+            if (unit->type == AV1_OBU_FRAME ||
+                unit->type == AV1_OBU_FRAME_HEADER)
+                nb_frames++;
+            else if (unit->type == AV1_OBU_TILE_LIST) {
+                av_log(ctx, AV_LOG_VERBOSE, "Large scale tiles are unsupported. Skipping Temporal Unit.\n");
+                nb_frames = 0;
+                break;
+            }
+        }
+        if (nb_frames > 1) {
+            s->cur_frame = 0;
+            // Don't attach the Temporal Delimiter OBU with the first frame
+            s->cur_frame_idx = s->last_frame_idx =
+                td->units[0].type == AV1_OBU_TEMPORAL_DELIMITER;
+            s->nb_frames = nb_frames;
+            split = 1;
+        }
+    }
+
+    if (split) {
+        AV1RawFrameHeader *frame = NULL;
+        int cur_frame_type = -1, size = 0;
+
+        for (i = s->cur_frame_idx; i < td->nb_units; i++) {
+            CodedBitstreamUnit *unit = &td->units[i];
+
+            size += unit->data_size;
+            if (unit->type == AV1_OBU_FRAME) {
+                AV1RawOBU *obu = unit->content;
+
+                if (frame) {
+                    ret = AVERROR_INVALIDDATA;
+                    goto fail;
+                }
+
+                frame = &obu->obu.frame.header;
+                cur_frame_type = obu->header.obu_type;
+                s->last_frame_idx = s->cur_frame_idx;
+                s->cur_frame_idx  = i + 1;
+                s->cur_frame++;
+
+                // split here unless it's the last frame, in which case
+                // include every trailing OBU
+                if (s->cur_frame < s->nb_frames)
+                    break;
+            } else if (unit->type == AV1_OBU_FRAME_HEADER) {
+                AV1RawOBU *obu = unit->content;
+
+                if (frame) {
+                    ret = AVERROR_INVALIDDATA;
+                    goto fail;
+                }
+
+                frame = &obu->obu.frame_header;
+                cur_frame_type = obu->header.obu_type;
+                s->last_frame_idx = s->cur_frame_idx;
+                s->cur_frame++;
+
+                // split here if show_existing_frame unless it's the last
+                // frame, in which case include every trailing OBU
+                if (frame->show_existing_frame &&
+                    s->cur_frame < s->nb_frames) {
+                    s->cur_frame_idx = i + 1;
+                    break;
+                }
+            } else if (unit->type == AV1_OBU_TILE_GROUP) {
+                AV1RawOBU *obu = unit->content;
+                AV1RawTileGroup *group = &obu->obu.tile_group;
+
+                if (!frame || cur_frame_type != AV1_OBU_FRAME_HEADER) {
+                    ret = AVERROR_INVALIDDATA;
+                    goto fail;
+                }
+
+                if ((group->tg_end == (frame->tile_cols * frame->tile_rows) - 1) &&
+                    // include every trailing OBU with the last frame
+                    s->cur_frame < s->nb_frames) {
+                    s->cur_frame_idx = i + 1;
+                    break;
+                }
+            }
+        }
+        av_assert0(frame && s->cur_frame <= s->nb_frames);
+
+        ret = av_packet_ref(out, s->buffer_pkt);
+        if (ret < 0)
+            goto fail;
+
+        out->data = (uint8_t *)td->units[s->last_frame_idx].data;
+        out->size = size;
+
+        if (!frame->show_existing_frame && !frame->show_frame)
+            out->pts = AV_NOPTS_VALUE;
+
+        if (s->cur_frame == s->nb_frames) {
+            av_packet_unref(s->buffer_pkt);
+            ff_cbs_fragment_reset(s->cbc, td);
+        }
+    } else {
+        av_packet_move_ref(out, s->buffer_pkt);
+        ff_cbs_fragment_reset(s->cbc, td);
+    }
+
+    return 0;
+
+fail:
+    if (ret < 0)
+        av_packet_unref(out);
+    av_packet_unref(s->buffer_pkt);
+    ff_cbs_fragment_reset(s->cbc, td);
+
+    return ret;
+}
+
+static const CodedBitstreamUnitType decompose_unit_types[] = {
+    AV1_OBU_TEMPORAL_DELIMITER,
+    AV1_OBU_SEQUENCE_HEADER,
+    AV1_OBU_FRAME_HEADER,
+    AV1_OBU_TILE_GROUP,
+    AV1_OBU_FRAME,
+};
+
+static int av1_frame_split_init(AVBSFContext *ctx)
+{
+    AV1FSplitContext *s = ctx->priv_data;
+    CodedBitstreamFragment *td = &s->temporal_unit;
+    int ret;
+
+    s->buffer_pkt = av_packet_alloc();
+    if (!s->buffer_pkt)
+        return AVERROR(ENOMEM);
+
+    ret = ff_cbs_init(&s->cbc, AV_CODEC_ID_AV1, ctx);
+    if (ret < 0)
+        return ret;
+
+    s->cbc->decompose_unit_types    = (CodedBitstreamUnitType*)decompose_unit_types;
+    s->cbc->nb_decompose_unit_types = FF_ARRAY_ELEMS(decompose_unit_types);
+
+    if (!ctx->par_in->extradata_size)
+        return 0;
+
+    ret = ff_cbs_read_extradata(s->cbc, td, ctx->par_in);
+    if (ret < 0)
+        av_log(ctx, AV_LOG_WARNING, "Failed to parse extradata.\n");
+
+    ff_cbs_fragment_reset(s->cbc, td);
+
+    return 0;
+}
+
+static void av1_frame_split_flush(AVBSFContext *ctx)
+{
+    AV1FSplitContext *s = ctx->priv_data;
+
+    av_packet_unref(s->buffer_pkt);
+    ff_cbs_fragment_reset(s->cbc, &s->temporal_unit);
+}
+
+static void av1_frame_split_close(AVBSFContext *ctx)
+{
+    AV1FSplitContext *s = ctx->priv_data;
+
+    av_packet_free(&s->buffer_pkt);
+    ff_cbs_fragment_free(s->cbc, &s->temporal_unit);
+    ff_cbs_close(&s->cbc);
+}
+
+static const enum AVCodecID av1_frame_split_codec_ids[] = {
+    AV_CODEC_ID_AV1, AV_CODEC_ID_NONE,
+};
+
+const AVBitStreamFilter ff_av1_frame_split_bsf = {
+    .name           = "av1_frame_split",
+    .priv_data_size = sizeof(AV1FSplitContext),
+    .init           = av1_frame_split_init,
+    .flush          = av1_frame_split_flush,
+    .close          = av1_frame_split_close,
+    .filter         = av1_frame_split_filter,
+    .codec_ids      = av1_frame_split_codec_ids,
+};
diff --git a/libavcodec/bitstream_filters.c b/libavcodec/bitstream_filters.c
index 2c999d3c1d..463003966a 100644
--- a/libavcodec/bitstream_filters.c
+++ b/libavcodec/bitstream_filters.c
@@ -25,6 +25,7 @@ 
 #include "bsf.h"

 extern const AVBitStreamFilter ff_aac_adtstoasc_bsf;
+extern const AVBitStreamFilter ff_av1_frame_split_bsf;
 extern const AVBitStreamFilter ff_av1_metadata_bsf;
 extern const AVBitStreamFilter ff_chomp_bsf;
 extern const AVBitStreamFilter ff_dump_extradata_bsf;