diff mbox

[FFmpeg-devel,10/20] lavc: Add hevc_metadata bitstream filter

Message ID 20171015150447.18058-11-sw@jkqxz.net
State New
Headers show

Commit Message

Mark Thompson Oct. 15, 2017, 3:04 p.m. UTC
This is able to modify some header metadata found in the VPS/SPS/VUI,
and can also add/remove AUDs.

(cherry picked from commit b31a9eae0233325c4b382c657f4b687d5d8b0812)
---
 configure                      |   1 +
 doc/bitstream_filters.texi     |  54 +++++
 libavcodec/Makefile            |   1 +
 libavcodec/bitstream_filters.c |   1 +
 libavcodec/h265_metadata_bsf.c | 458 +++++++++++++++++++++++++++++++++++++++++
 5 files changed, 515 insertions(+)
 create mode 100644 libavcodec/h265_metadata_bsf.c
diff mbox

Patch

diff --git a/configure b/configure
index a92d6c6cc6..c909fbe4f9 100755
--- a/configure
+++ b/configure
@@ -2915,6 +2915,7 @@  vc1_parser_select="vc1dsp"
 # bitstream_filters
 h264_metadata_bsf_select="cbs_h264"
 h264_redundant_pps_bsf_select="cbs_h264"
+hevc_metadata_bsf_select="cbs_h265"
 mjpeg2jpeg_bsf_select="jpegtables"
 trace_headers_bsf_select="cbs_h264 cbs_h265 cbs_mpeg2"
 
diff --git a/doc/bitstream_filters.texi b/doc/bitstream_filters.texi
index 0e116a9c09..140954880c 100644
--- a/doc/bitstream_filters.texi
+++ b/doc/bitstream_filters.texi
@@ -183,6 +183,60 @@  confuse other transformations which require correct extradata.
 A new single global PPS is created, and all of the redundant PPSs
 within the stream are removed.
 
+@section hevc_metadata
+
+Modify metadata embedded in an HEVC stream.
+
+@table @option
+@item aud
+Insert or remove AUD NAL units in all access units of the stream.
+
+@table @samp
+@item insert
+@item remove
+@end table
+
+@item sample_aspect_ratio
+Set the sample aspect ratio in the stream in the VUI parameters.
+
+@item video_format
+@item video_full_range_flag
+Set the video format in the stream (see H.265 section E.3.1 and
+table E.2).
+
+@item colour_primaries
+@item transfer_characteristics
+@item matrix_coefficients
+Set the colour description in the stream (see H.265 section E.3.1
+and tables E.3, E.4 and E.5).
+
+@item chroma_sample_loc_type
+Set the chroma sample location in the stream (see H.265 section
+E.3.1 and figure E.1).
+
+@item tick_rate
+Set the tick rate in the VPS and VUI parameters (num_units_in_tick /
+time_scale).  Combined with @option{num_ticks_poc_diff_one}, this can
+set a constant framerate in the stream.  Note that it is likely to be
+overridden by container parameters when the stream is in a container.
+
+@item num_ticks_poc_diff_one
+Set poc_proportional_to_timing_flag in VPS and VUI and use this value
+to set num_ticks_poc_diff_one_minus1 (see H.265 sections 7.4.3.1 and
+E.3.1).  Ignored if @option{tick_rate} is not also set.
+
+@item crop_left
+@item crop_right
+@item crop_top
+@item crop_bottom
+Set the conformance window cropping offsets in the SPS.  These values
+will replace the current ones if the stream is already cropped.
+
+These fields are set in pixels.  Note that some sizes may not be
+representable if the chroma is subsampled (H.265 section 7.4.3.2.1).
+
+@end table
+
 @section hevc_mp4toannexb
 
 Convert an HEVC/H.265 bitstream from length prefixed mode to start code
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index a5b2b17cf3..8a68c1b929 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1018,6 +1018,7 @@  OBJS-$(CONFIG_EXTRACT_EXTRADATA_BSF)      += extract_extradata_bsf.o    \
 OBJS-$(CONFIG_H264_METADATA_BSF)          += h264_metadata_bsf.o
 OBJS-$(CONFIG_H264_MP4TOANNEXB_BSF)       += h264_mp4toannexb_bsf.o
 OBJS-$(CONFIG_H264_REDUNDANT_PPS_BSF)     += h264_redundant_pps_bsf.o
+OBJS-$(CONFIG_HEVC_METADATA_BSF)          += h265_metadata_bsf.o
 OBJS-$(CONFIG_HEVC_MP4TOANNEXB_BSF)       += hevc_mp4toannexb_bsf.o
 OBJS-$(CONFIG_IMX_DUMP_HEADER_BSF)        += imx_dump_header_bsf.o
 OBJS-$(CONFIG_MJPEG2JPEG_BSF)             += mjpeg2jpeg_bsf.o
diff --git a/libavcodec/bitstream_filters.c b/libavcodec/bitstream_filters.c
index 5ab4f14642..6e6b894e7f 100644
--- a/libavcodec/bitstream_filters.c
+++ b/libavcodec/bitstream_filters.c
@@ -32,6 +32,7 @@  extern const AVBitStreamFilter ff_extract_extradata_bsf;
 extern const AVBitStreamFilter ff_h264_metadata_bsf;
 extern const AVBitStreamFilter ff_h264_mp4toannexb_bsf;
 extern const AVBitStreamFilter ff_h264_redundant_pps_bsf;
+extern const AVBitStreamFilter ff_hevc_metadata_bsf;
 extern const AVBitStreamFilter ff_hevc_mp4toannexb_bsf;
 extern const AVBitStreamFilter ff_imx_dump_header_bsf;
 extern const AVBitStreamFilter ff_mjpeg2jpeg_bsf;
diff --git a/libavcodec/h265_metadata_bsf.c b/libavcodec/h265_metadata_bsf.c
new file mode 100644
index 0000000000..9af5cc256f
--- /dev/null
+++ b/libavcodec/h265_metadata_bsf.c
@@ -0,0 +1,458 @@ 
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/common.h"
+#include "libavutil/opt.h"
+
+#include "bsf.h"
+#include "cbs.h"
+#include "cbs_h265.h"
+#include "hevc.h"
+
+enum {
+    PASS,
+    INSERT,
+    REMOVE,
+};
+
+typedef struct H265MetadataContext {
+    const AVClass *class;
+
+    CodedBitstreamContext *cbc;
+    CodedBitstreamFragment access_unit;
+
+    H265RawAUD aud_nal;
+
+    int aud;
+
+    AVRational sample_aspect_ratio;
+
+    int video_format;
+    int video_full_range_flag;
+    int colour_primaries;
+    int transfer_characteristics;
+    int matrix_coefficients;
+
+    int chroma_sample_loc_type;
+
+    AVRational tick_rate;
+    int poc_proportional_to_timing_flag;
+    int num_ticks_poc_diff_one;
+
+    int crop_left;
+    int crop_right;
+    int crop_top;
+    int crop_bottom;
+} H265MetadataContext;
+
+
+static int h265_metadata_update_vps(AVBSFContext *bsf,
+                                    H265RawVPS *vps)
+{
+    H265MetadataContext *ctx = bsf->priv_data;
+
+    if (ctx->tick_rate.num && ctx->tick_rate.den) {
+        int num, den;
+
+        av_reduce(&num, &den, ctx->tick_rate.num, ctx->tick_rate.den,
+                  UINT32_MAX > INT_MAX ? UINT32_MAX : INT_MAX);
+
+        vps->vps_time_scale        = num;
+        vps->vps_num_units_in_tick = den;
+
+        vps->vps_timing_info_present_flag = 1;
+
+        if (ctx->num_ticks_poc_diff_one > 0) {
+            vps->vps_num_ticks_poc_diff_one_minus1 =
+                ctx->num_ticks_poc_diff_one - 1;
+            vps->vps_poc_proportional_to_timing_flag = 1;
+        } else if (ctx->num_ticks_poc_diff_one == 0) {
+            vps->vps_poc_proportional_to_timing_flag = 0;
+        }
+    }
+
+    return 0;
+}
+
+static int h265_metadata_update_sps(AVBSFContext *bsf,
+                                    H265RawSPS *sps)
+{
+    H265MetadataContext *ctx = bsf->priv_data;
+    int need_vui = 0;
+    int crop_unit_x, crop_unit_y;
+
+    if (ctx->sample_aspect_ratio.num && ctx->sample_aspect_ratio.den) {
+        // Table E-1.
+        static const AVRational sar_idc[] = {
+            {   0,  0 }, // Unspecified (never written here).
+            {   1,  1 }, {  12, 11 }, {  10, 11 }, {  16, 11 },
+            {  40, 33 }, {  24, 11 }, {  20, 11 }, {  32, 11 },
+            {  80, 33 }, {  18, 11 }, {  15, 11 }, {  64, 33 },
+            { 160, 99 }, {   4,  3 }, {   3,  2 }, {   2,  1 },
+        };
+        int num, den, i;
+
+        av_reduce(&num, &den, ctx->sample_aspect_ratio.num,
+                  ctx->sample_aspect_ratio.den, 65535);
+
+        for (i = 1; i < FF_ARRAY_ELEMS(sar_idc); i++) {
+            if (num == sar_idc[i].num &&
+                den == sar_idc[i].den)
+                break;
+        }
+        if (i == FF_ARRAY_ELEMS(sar_idc)) {
+            sps->vui.aspect_ratio_idc = 255;
+            sps->vui.sar_width  = num;
+            sps->vui.sar_height = den;
+        } else {
+            sps->vui.aspect_ratio_idc = i;
+        }
+        sps->vui.aspect_ratio_info_present_flag = 1;
+        need_vui = 1;
+    }
+
+#define SET_OR_INFER(field, value, present_flag, infer) do { \
+        if (value >= 0) { \
+            field = value; \
+            need_vui = 1; \
+        } else if (!present_flag) \
+            field = infer; \
+    } while (0)
+
+    if (ctx->video_format             >= 0 ||
+        ctx->video_full_range_flag    >= 0 ||
+        ctx->colour_primaries         >= 0 ||
+        ctx->transfer_characteristics >= 0 ||
+        ctx->matrix_coefficients      >= 0) {
+
+        SET_OR_INFER(sps->vui.video_format, ctx->video_format,
+                     sps->vui.video_signal_type_present_flag, 5);
+
+        SET_OR_INFER(sps->vui.video_full_range_flag,
+                     ctx->video_full_range_flag,
+                     sps->vui.video_signal_type_present_flag, 0);
+
+        if (ctx->colour_primaries         >= 0 ||
+            ctx->transfer_characteristics >= 0 ||
+            ctx->matrix_coefficients      >= 0) {
+
+            SET_OR_INFER(sps->vui.colour_primaries,
+                         ctx->colour_primaries,
+                         sps->vui.colour_description_present_flag, 2);
+
+            SET_OR_INFER(sps->vui.transfer_characteristics,
+                         ctx->transfer_characteristics,
+                         sps->vui.colour_description_present_flag, 2);
+
+            SET_OR_INFER(sps->vui.matrix_coefficients,
+                         ctx->matrix_coefficients,
+                         sps->vui.colour_description_present_flag, 2);
+
+            sps->vui.colour_description_present_flag = 1;
+        }
+        sps->vui.video_signal_type_present_flag = 1;
+        need_vui = 1;
+    }
+
+    if (ctx->chroma_sample_loc_type >= 0) {
+        sps->vui.chroma_sample_loc_type_top_field =
+            ctx->chroma_sample_loc_type;
+        sps->vui.chroma_sample_loc_type_bottom_field =
+            ctx->chroma_sample_loc_type;
+        sps->vui.chroma_loc_info_present_flag = 1;
+        need_vui = 1;
+    }
+
+    if (ctx->tick_rate.num && ctx->tick_rate.den) {
+        int num, den;
+
+        av_reduce(&num, &den, ctx->tick_rate.num, ctx->tick_rate.den,
+                  UINT32_MAX > INT_MAX ? UINT32_MAX : INT_MAX);
+
+        sps->vui.vui_time_scale        = num;
+        sps->vui.vui_num_units_in_tick = den;
+
+        sps->vui.vui_timing_info_present_flag = 1;
+        need_vui = 1;
+
+        if (ctx->num_ticks_poc_diff_one > 0) {
+            sps->vui.vui_num_ticks_poc_diff_one_minus1 =
+                ctx->num_ticks_poc_diff_one - 1;
+            sps->vui.vui_poc_proportional_to_timing_flag = 1;
+        } else if (ctx->num_ticks_poc_diff_one == 0) {
+            sps->vui.vui_poc_proportional_to_timing_flag = 0;
+        }
+    }
+
+    if (sps->separate_colour_plane_flag || sps->chroma_format_idc == 0) {
+        crop_unit_x = 1;
+        crop_unit_y = 1;
+    } else {
+        crop_unit_x = 1 + (sps->chroma_format_idc < 3);
+        crop_unit_y = 1 + (sps->chroma_format_idc < 2);
+    }
+#define CROP(border, unit) do { \
+        if (ctx->crop_ ## border >= 0) { \
+            if (ctx->crop_ ## border % unit != 0) { \
+                av_log(bsf, AV_LOG_ERROR, "Invalid value for crop_%s: " \
+                       "must be a multiple of %d.\n", #border, unit); \
+                return AVERROR(EINVAL); \
+            } \
+            sps->conf_win_ ## border ## _offset = \
+                ctx->crop_ ## border / unit; \
+            sps->conformance_window_flag = 1; \
+        } \
+    } while (0)
+    CROP(left,   crop_unit_x);
+    CROP(right,  crop_unit_x);
+    CROP(top,    crop_unit_y);
+    CROP(bottom, crop_unit_y);
+#undef CROP
+
+    if (need_vui)
+        sps->vui_parameters_present_flag = 1;
+
+    return 0;
+}
+
+static int h265_metadata_filter(AVBSFContext *bsf, AVPacket *out)
+{
+    H265MetadataContext *ctx = bsf->priv_data;
+    AVPacket *in = NULL;
+    CodedBitstreamFragment *au = &ctx->access_unit;
+    int err, i;
+
+    err = ff_bsf_get_packet(bsf, &in);
+    if (err < 0)
+        goto fail;
+
+    err = ff_cbs_read_packet(ctx->cbc, au, in);
+    if (err < 0) {
+        av_log(bsf, AV_LOG_ERROR, "Failed to read packet.\n");
+        goto fail;
+    }
+
+    if (au->nb_units == 0) {
+        av_log(bsf, AV_LOG_ERROR, "No NAL units in packet.\n");
+        err = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    // If an AUD is present, it must be the first NAL unit.
+    if (au->units[0].type == HEVC_NAL_AUD) {
+        if (ctx->aud == REMOVE)
+            ff_cbs_delete_unit(ctx->cbc, au, 0);
+    } else {
+        if (ctx->aud == INSERT) {
+            H265RawAUD *aud = &ctx->aud_nal;
+            int pic_type = 0, temporal_id = 8, layer_id = 0;
+
+            for (i = 0; i < au->nb_units; i++) {
+                const H265RawNALUnitHeader *nal = au->units[i].content;
+                if (!nal)
+                    continue;
+                if (nal->nuh_temporal_id_plus1 < temporal_id + 1)
+                    temporal_id = nal->nuh_temporal_id_plus1 - 1;
+
+                if (au->units[i].type <= HEVC_NAL_RSV_VCL31) {
+                    const H265RawSlice *slice = au->units[i].content;
+                    layer_id = nal->nuh_layer_id;
+                    if (slice->header.slice_type == HEVC_SLICE_B &&
+                        pic_type < 2)
+                        pic_type = 2;
+                    if (slice->header.slice_type == HEVC_SLICE_P &&
+                        pic_type < 1)
+                        pic_type = 1;
+                }
+            }
+
+            aud->nal_unit_header = (H265RawNALUnitHeader) {
+                .nal_unit_type         = HEVC_NAL_AUD,
+                .nuh_layer_id          = layer_id,
+                .nuh_temporal_id_plus1 = temporal_id + 1,
+            };
+            aud->pic_type = pic_type;
+
+            err = ff_cbs_insert_unit_content(ctx->cbc, au,
+                                             0, HEVC_NAL_AUD, aud);
+            if (err) {
+                av_log(bsf, AV_LOG_ERROR, "Failed to insert AUD.\n");
+                goto fail;
+            }
+        }
+    }
+
+    for (i = 0; i < au->nb_units; i++) {
+        if (au->units[i].type == HEVC_NAL_VPS) {
+            err = h265_metadata_update_vps(bsf, au->units[i].content);
+            if (err < 0)
+                goto fail;
+        }
+        if (au->units[i].type == HEVC_NAL_SPS) {
+            err = h265_metadata_update_sps(bsf, au->units[i].content);
+            if (err < 0)
+                goto fail;
+        }
+    }
+
+    err = ff_cbs_write_packet(ctx->cbc, out, au);
+    if (err < 0) {
+        av_log(bsf, AV_LOG_ERROR, "Failed to write packet.\n");
+        goto fail;
+    }
+
+    err = av_packet_copy_props(out, in);
+    if (err < 0)
+        goto fail;
+
+    err = 0;
+fail:
+    ff_cbs_fragment_uninit(ctx->cbc, au);
+
+    av_packet_free(&in);
+
+    return err;
+}
+
+static int h265_metadata_init(AVBSFContext *bsf)
+{
+    H265MetadataContext *ctx = bsf->priv_data;
+    CodedBitstreamFragment *au = &ctx->access_unit;
+    int err, i;
+
+    err = ff_cbs_init(&ctx->cbc, AV_CODEC_ID_HEVC, bsf);
+    if (err < 0)
+        return err;
+
+    if (bsf->par_in->extradata) {
+        err = ff_cbs_read_extradata(ctx->cbc, au, bsf->par_in);
+        if (err < 0) {
+            av_log(bsf, AV_LOG_ERROR, "Failed to read extradata.\n");
+            goto fail;
+        }
+
+        for (i = 0; i < au->nb_units; i++) {
+            if (au->units[i].type == HEVC_NAL_VPS) {
+                err = h265_metadata_update_vps(bsf, au->units[i].content);
+                if (err < 0)
+                    goto fail;
+            }
+            if (au->units[i].type == HEVC_NAL_SPS) {
+                err = h265_metadata_update_sps(bsf, au->units[i].content);
+                if (err < 0)
+                    goto fail;
+            }
+        }
+
+        err = ff_cbs_write_extradata(ctx->cbc, bsf->par_out, au);
+        if (err < 0) {
+            av_log(bsf, AV_LOG_ERROR, "Failed to write extradata.\n");
+            goto fail;
+        }
+    }
+
+    err = 0;
+fail:
+    ff_cbs_fragment_uninit(ctx->cbc, au);
+    return err;
+}
+
+static void h265_metadata_close(AVBSFContext *bsf)
+{
+    H265MetadataContext *ctx = bsf->priv_data;
+    ff_cbs_close(&ctx->cbc);
+}
+
+#define OFFSET(x) offsetof(H265MetadataContext, x)
+static const AVOption h265_metadata_options[] = {
+    { "aud", "Access Unit Delimiter NAL units",
+        OFFSET(aud), AV_OPT_TYPE_INT,
+        { .i64 = PASS }, PASS, REMOVE, 0, "aud" },
+    { "pass",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PASS   }, .unit = "aud" },
+    { "insert", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = INSERT }, .unit = "aud" },
+    { "remove", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = REMOVE }, .unit = "aud" },
+
+    { "sample_aspect_ratio", "Set sample aspect ratio (table E-1)",
+        OFFSET(sample_aspect_ratio), AV_OPT_TYPE_RATIONAL,
+        { .dbl = 0.0 }, 0, 65535 },
+
+    { "video_format", "Set video format (table E-2)",
+        OFFSET(video_format), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, 7 },
+    { "video_full_range_flag", "Set video full range flag",
+        OFFSET(video_full_range_flag), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, 1 },
+    { "colour_primaries", "Set colour primaries (table E-3)",
+        OFFSET(colour_primaries), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, 255 },
+    { "transfer_characteristics", "Set transfer characteristics (table E-4)",
+        OFFSET(transfer_characteristics), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, 255 },
+    { "matrix_coefficients", "Set matrix coefficients (table E-5)",
+        OFFSET(matrix_coefficients), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, 255 },
+
+    { "chroma_sample_loc_type", "Set chroma sample location type (figure E-1)",
+        OFFSET(chroma_sample_loc_type), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, 6 },
+
+    { "tick_rate",
+        "Set VPS and VUI tick rate (num_units_in_tick / time_scale)",
+        OFFSET(tick_rate), AV_OPT_TYPE_RATIONAL,
+        { .dbl = 0.0 }, 0, UINT_MAX },
+    { "num_ticks_poc_diff_one",
+        "Set VPS and VUI number of ticks per POC increment",
+        OFFSET(num_ticks_poc_diff_one), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, INT_MAX },
+
+    { "crop_left", "Set left border crop offset",
+        OFFSET(crop_left), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, HEVC_MAX_WIDTH },
+    { "crop_right", "Set right border crop offset",
+        OFFSET(crop_right), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, HEVC_MAX_WIDTH },
+    { "crop_top", "Set top border crop offset",
+        OFFSET(crop_top), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, HEVC_MAX_HEIGHT },
+    { "crop_bottom", "Set bottom border crop offset",
+        OFFSET(crop_bottom), AV_OPT_TYPE_INT,
+        { .i64 = -1 }, -1, HEVC_MAX_HEIGHT },
+
+    { NULL }
+};
+
+static const AVClass h265_metadata_class = {
+    .class_name = "h265_metadata_bsf",
+    .item_name  = av_default_item_name,
+    .option     = h265_metadata_options,
+    .version    = LIBAVCODEC_VERSION_MAJOR,
+};
+
+static const enum AVCodecID h265_metadata_codec_ids[] = {
+    AV_CODEC_ID_HEVC, AV_CODEC_ID_NONE,
+};
+
+const AVBitStreamFilter ff_hevc_metadata_bsf = {
+    .name           = "hevc_metadata",
+    .priv_data_size = sizeof(H265MetadataContext),
+    .priv_class     = &h265_metadata_class,
+    .init           = &h265_metadata_init,
+    .close          = &h265_metadata_close,
+    .filter         = &h265_metadata_filter,
+    .codec_ids      = h265_metadata_codec_ids,
+};