diff mbox series

[FFmpeg-devel,8/8] avcodec/bsf/dovi_rpu: add new bitstream filter

Message ID 20240618194221.26073-9-ffmpeg@haasn.xyz
State New
Headers show
Series DoVi metadata compression | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Niklas Haas June 18, 2024, 7:35 p.m. UTC
From: Niklas Haas <git@haasn.dev>

This can be used to strip dovi metadata, or enable/disable dovi
metadata compression. Possibly more use cases in the future.
---
 configure                      |   1 +
 doc/bitstream_filters.texi     |  21 +++
 libavcodec/bitstream_filters.c |   1 +
 libavcodec/bsf/Makefile        |   1 +
 libavcodec/bsf/dovi_rpu.c      | 258 +++++++++++++++++++++++++++++++++
 5 files changed, 282 insertions(+)
 create mode 100644 libavcodec/bsf/dovi_rpu.c
diff mbox series

Patch

diff --git a/configure b/configure
index 95565994fe..b432a1c11c 100755
--- a/configure
+++ b/configure
@@ -3437,6 +3437,7 @@  aac_adtstoasc_bsf_select="adts_header mpeg4audio"
 av1_frame_merge_bsf_select="cbs_av1"
 av1_frame_split_bsf_select="cbs_av1"
 av1_metadata_bsf_select="cbs_av1"
+dovi_rpu_bsf_select="cbs_h265 cbs_av1 dovi_rpudec dovi_rpuenc"
 dts2pts_bsf_select="cbs_h264 h264parse"
 eac3_core_bsf_select="ac3_parser"
 evc_frame_merge_bsf_select="evcparse"
diff --git a/doc/bitstream_filters.texi b/doc/bitstream_filters.texi
index c03f04f858..918735e8c5 100644
--- a/doc/bitstream_filters.texi
+++ b/doc/bitstream_filters.texi
@@ -101,6 +101,27 @@  Remove zero padding at the end of a packet.
 Extract the core from a DCA/DTS stream, dropping extensions such as
 DTS-HD.
 
+@section dovi_rpu
+
+Manipulate Dolby Vision metadata in a HEVC/AV1 bitstream, optionally enabling
+metadata compression.
+
+@table @option
+@item strip
+If enabled, strip all Dolby Vision metadata (configuration record + RPU data
+blocks) from the stream.
+@item compression
+A bit mask of compression methods to enable.
+@table @samp
+@item none
+No compression. Selected automatically for keyframes.
+@item vdr
+Compress VDR metadata (color reshaping / data mapping parameters).
+@item all
+Enable all implemented compression methods. This is the default.
+@end table
+@end table
+
 @section dump_extra
 
 Add extradata to the beginning of the filtered packets except when
diff --git a/libavcodec/bitstream_filters.c b/libavcodec/bitstream_filters.c
index 138246c50e..f923411bee 100644
--- a/libavcodec/bitstream_filters.c
+++ b/libavcodec/bitstream_filters.c
@@ -31,6 +31,7 @@  extern const FFBitStreamFilter ff_av1_metadata_bsf;
 extern const FFBitStreamFilter ff_chomp_bsf;
 extern const FFBitStreamFilter ff_dump_extradata_bsf;
 extern const FFBitStreamFilter ff_dca_core_bsf;
+extern const FFBitStreamFilter ff_dovi_rpu_bsf;
 extern const FFBitStreamFilter ff_dts2pts_bsf;
 extern const FFBitStreamFilter ff_dv_error_marker_bsf;
 extern const FFBitStreamFilter ff_eac3_core_bsf;
diff --git a/libavcodec/bsf/Makefile b/libavcodec/bsf/Makefile
index fb70ad0c21..40b7fc6e9b 100644
--- a/libavcodec/bsf/Makefile
+++ b/libavcodec/bsf/Makefile
@@ -19,6 +19,7 @@  OBJS-$(CONFIG_H264_MP4TOANNEXB_BSF)       += bsf/h264_mp4toannexb.o
 OBJS-$(CONFIG_H264_REDUNDANT_PPS_BSF)     += bsf/h264_redundant_pps.o
 OBJS-$(CONFIG_HAPQA_EXTRACT_BSF)          += bsf/hapqa_extract.o
 OBJS-$(CONFIG_HEVC_METADATA_BSF)          += bsf/h265_metadata.o
+OBJS-$(CONFIG_DOVI_RPU_BSF)               += bsf/dovi_rpu.o
 OBJS-$(CONFIG_HEVC_MP4TOANNEXB_BSF)       += bsf/hevc_mp4toannexb.o
 OBJS-$(CONFIG_IMX_DUMP_HEADER_BSF)        += bsf/imx_dump_header.o
 OBJS-$(CONFIG_MEDIA100_TO_MJPEGB_BSF)     += bsf/media100_to_mjpegb.o
diff --git a/libavcodec/bsf/dovi_rpu.c b/libavcodec/bsf/dovi_rpu.c
new file mode 100644
index 0000000000..c57c3d87dd
--- /dev/null
+++ b/libavcodec/bsf/dovi_rpu.c
@@ -0,0 +1,258 @@ 
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/common.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+
+#include "bsf.h"
+#include "bsf_internal.h"
+#include "cbs.h"
+#include "cbs_bsf.h"
+#include "cbs_av1.h"
+#include "cbs_h265.h"
+#include "dovi_rpu.h"
+#include "h2645data.h"
+#include "h265_profile_level.h"
+#include "itut35.h"
+
+#include "hevc/hevc.h"
+
+typedef struct DoviRpuContext {
+    CBSBSFContext common;
+    DOVIContext dec;
+    DOVIContext enc;
+
+    int strip;
+    int compression;
+} DoviRpuContext;
+
+static int update_rpu(AVBSFContext *bsf, const AVPacket *pkt, int flags,
+                      const uint8_t *rpu, size_t rpu_size,
+                      uint8_t **out_rpu, int *out_size)
+{
+    DoviRpuContext *s = bsf->priv_data;
+    AVDOVIMetadata *metadata = NULL;
+    int ret;
+
+    ret = ff_dovi_rpu_parse(&s->dec, rpu, rpu_size, 0);
+    if (ret < 0) {
+        ff_dovi_ctx_flush(&s->dec);
+        return ret;
+    }
+
+    ret = ff_dovi_get_metadata(&s->dec, &metadata);
+    if (ret == 0 /* no metadata */) {
+        *out_rpu = NULL;
+        *out_size = 0;
+        return 0;
+    } else if (ret < 0) {
+        ff_dovi_ctx_flush(&s->dec);
+        return ret;
+    }
+
+    if (pkt && !(pkt->flags & AV_PKT_FLAG_KEY))
+        flags |= s->compression;
+    ret = ff_dovi_rpu_generate(&s->enc, metadata, flags, out_rpu, out_size);
+    av_free(metadata);
+    if (ret < 0)
+        ff_dovi_ctx_flush(&s->enc);
+
+    return ret;
+}
+
+static int dovi_rpu_update_fragment_hevc(AVBSFContext *bsf, AVPacket *pkt,
+                                         CodedBitstreamFragment *au)
+{
+    DoviRpuContext *s = bsf->priv_data;
+    CodedBitstreamUnit *nal = au->nb_units ? &au->units[au->nb_units - 1] : NULL;
+    uint8_t *rpu = NULL;
+    int rpu_size, ret;
+
+    if (!nal || nal->type != HEVC_NAL_UNSPEC62)
+        return 0;
+
+    if (s->strip) {
+        ff_cbs_delete_unit(au, au->nb_units - 1);
+        return 0;
+    }
+
+    ret = update_rpu(bsf, pkt, 0, nal->data + 2, nal->data_size - 2, &rpu, &rpu_size);
+    if (ret < 0)
+        return ret;
+
+    /* NAL unit header + NAL prefix */
+    if (rpu_size + 3 <= nal->data_size && av_buffer_is_writable(nal->data_ref)) {
+        memcpy(nal->data + 3, rpu, rpu_size);
+        av_free(rpu);
+        nal->data_size = rpu_size + 3;
+    } else {
+        AVBufferRef *ref = av_buffer_alloc(rpu_size + 3);
+        if (!ref) {
+            av_free(rpu);
+            return AVERROR(ENOMEM);
+        }
+
+        memcpy(ref->data, nal->data, 3);
+        memcpy(ref->data + 3, rpu, rpu_size);
+        av_buffer_unref(&nal->data_ref);
+        av_free(rpu);
+        nal->data = ref->data;
+        nal->data_size = rpu_size + 3;
+        nal->data_ref = ref;
+        nal->data_bit_padding = 0;
+    }
+
+    return 0;
+}
+
+static int dovi_rpu_update_fragment_av1(AVBSFContext *bsf, AVPacket *pkt,
+                                        CodedBitstreamFragment *frag)
+{
+    DoviRpuContext *s = bsf->priv_data;
+    int provider_code, provider_oriented_code, rpu_size, ret;
+    AVBufferRef *ref;
+    uint8_t *rpu;
+
+    for (int i = 0; i < frag->nb_units; i++) {
+        AV1RawOBU *obu = frag->units[i].content;
+        AV1RawMetadataITUTT35 *t35 = &obu->obu.metadata.metadata.itut_t35;
+        if (frag->units[i].type != AV1_OBU_METADATA ||
+            obu->obu.metadata.metadata_type != AV1_METADATA_TYPE_ITUT_T35 ||
+            t35->itu_t_t35_country_code != ITU_T_T35_COUNTRY_CODE_US ||
+            t35->payload_size < 6)
+            continue;
+
+        provider_code = AV_RB16(t35->payload);
+        provider_oriented_code = AV_RB32(t35->payload + 2);
+        if (provider_code != ITU_T_T35_PROVIDER_CODE_DOLBY ||
+            provider_oriented_code != 0x800)
+            continue;
+
+        if (s->strip) {
+            ff_cbs_delete_unit(frag, i);
+            return 0;
+        }
+
+        ret = update_rpu(bsf, pkt, FF_DOVI_WRAP_T35,
+                         t35->payload + 6, t35->payload_size - 6,
+                         &rpu, &rpu_size);
+        if (ret < 0)
+            return ret;
+
+        ref = av_buffer_create(rpu, rpu_size, av_buffer_default_free, NULL, 0);
+        if (!ref) {
+            av_free(rpu);
+            return AVERROR(ENOMEM);
+        }
+
+        av_buffer_unref(&t35->payload_ref);
+        t35->payload_ref = ref;
+        t35->payload = rpu + 1; /* skip country code */
+        t35->payload_size = rpu_size - 1;
+        break; /* should be only one RPU per packet */
+    }
+
+    return 0;
+}
+
+static const CBSBSFType dovi_rpu_hevc_type = {
+    .codec_id        = AV_CODEC_ID_HEVC,
+    .fragment_name   = "access unit",
+    .unit_name       = "NAL unit",
+    .update_fragment = &dovi_rpu_update_fragment_hevc,
+};
+
+static const CBSBSFType dovi_rpu_av1_type = {
+    .codec_id        = AV_CODEC_ID_AV1,
+    .fragment_name   = "temporal unit",
+    .unit_name       = "OBU",
+    .update_fragment = &dovi_rpu_update_fragment_av1,
+};
+
+static int dovi_rpu_init(AVBSFContext *bsf)
+{
+    DoviRpuContext *s = bsf->priv_data;
+
+    if (s->strip) {
+        av_packet_side_data_remove(bsf->par_out->coded_side_data,
+                                   &bsf->par_out->nb_coded_side_data,
+                                   AV_PKT_DATA_DOVI_CONF);
+    } else {
+        const AVPacketSideData *sd;
+        sd = av_packet_side_data_get(bsf->par_in->coded_side_data,
+                                     bsf->par_in->nb_coded_side_data,
+                                     AV_PKT_DATA_DOVI_CONF);
+        if (!sd) {
+            av_log(bsf, AV_LOG_ERROR, "No Dolby Vision configuration record found?\n");
+            return AVERROR(EINVAL);
+        }
+
+        s->dec.logctx = s->enc.logctx = bsf;
+        s->dec.cfg = s->enc.cfg = *(AVDOVIDecoderConfigurationRecord *) sd->data;
+    }
+
+    switch (bsf->par_in->codec_id) {
+    case AV_CODEC_ID_HEVC:
+        return ff_cbs_bsf_generic_init(bsf, &dovi_rpu_hevc_type);
+    case AV_CODEC_ID_AV1:
+        return ff_cbs_bsf_generic_init(bsf, &dovi_rpu_av1_type);
+    default:
+        return AVERROR_BUG;
+    }
+}
+
+static void dovi_rpu_close(AVBSFContext *bsf)
+{
+    DoviRpuContext *s = bsf->priv_data;
+    ff_dovi_ctx_unref(&s->dec);
+    ff_dovi_ctx_unref(&s->enc);
+    ff_cbs_bsf_generic_close(bsf);
+}
+
+#define OFFSET(x) offsetof(DoviRpuContext, x)
+#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_BSF_PARAM)
+static const AVOption dovi_rpu_options[] = {
+    { "strip", "Strip Dolby Vision metadata", OFFSET(strip), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
+    { "compression", "DV metadata compression mode", OFFSET(compression), AV_OPT_TYPE_FLAGS, { .i64 = FF_DOVI_COMPRESS_ALL }, 0, FF_DOVI_COMPRESS_ALL, FLAGS, .unit = "compression" },
+        { "none", "Don't compress metadata", 0, AV_OPT_TYPE_CONST, {.i64 = 0}, 0, 0, FLAGS, .unit = "compression" },
+        { "vdr", "Compress VDR metadata", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DOVI_COMPRESS_VDR}, 0, 0, FLAGS, .unit = "compression" },
+        { "all", "Compress all metadata", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DOVI_COMPRESS_ALL}, 0, 0, FLAGS, .unit = "compression" },
+    { NULL }
+};
+
+static const AVClass dovi_rpu_class = {
+    .class_name = "dovi_rpu_bsf",
+    .item_name  = av_default_item_name,
+    .option     = dovi_rpu_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+static const enum AVCodecID dovi_rpu_codec_ids[] = {
+    AV_CODEC_ID_HEVC, AV_CODEC_ID_AV1, AV_CODEC_ID_NONE,
+};
+
+const FFBitStreamFilter ff_dovi_rpu_bsf = {
+    .p.name         = "dovi_rpu",
+    .p.codec_ids    = dovi_rpu_codec_ids,
+    .p.priv_class   = &dovi_rpu_class,
+    .priv_data_size = sizeof(DoviRpuContext),
+    .init           = &dovi_rpu_init,
+    .close          = &dovi_rpu_close,
+    .filter         = &ff_cbs_bsf_generic_filter,
+};