[FFmpeg-devel] avcodec/libvpxenc: add VP8 temporal scalability configuration options

Submitted by Ard Oerlemans on Nov. 8, 2018, 8:44 p.m.

Details

Message ID 20181108204442.133420-1-ardoerlemans@google.com
State New
Headers show

Commit Message

Ard Oerlemans Nov. 8, 2018, 8:44 p.m.
This commit adds configuration options to libvpxenc.c that can be used to
enable VP8 temporal scalability. It also adds a way to programmatically set the
per-frame encoding flags which can be used to control usage and updates of
reference frames while encoding with temporal scalability enabled.
---
 doc/encoders.texi      | 28 ++++++++++++++
 libavcodec/libvpxenc.c | 85 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 113 insertions(+)

Comments

Carl Eugen Hoyos Nov. 9, 2018, 12:22 a.m.
2018-11-08 21:44 GMT+01:00, Ard Oerlemans
<ardoerlemans-at-google.com@ffmpeg.org>:

> +#if CONFIG_LIBVPX_VP8_ENCODER
> +        if (frame->metadata) {

if (CONFIG_LIBVPX_VP8_ENCODER && frame->metadata)
may be more inline with FFmpeg's code style.

Carl Eugen

Patch hide | download patch | download mbox

diff --git a/doc/encoders.texi b/doc/encoders.texi
index 899faac49b..6ecd572ea3 100644
--- a/doc/encoders.texi
+++ b/doc/encoders.texi
@@ -1654,6 +1654,34 @@  Set number of frames to look ahead for frametype and ratecontrol.
 @item error-resilient
 Enable error resiliency features.
 
+@item VP8-specific options
+@table @option
+@item ts-parameters
+Sets the temporal scalability configuration using a :-separated list of
+key=value pairs. For example, to specify temporal scalability parameters
+with @code{ffmpeg}:
+@example
+ffmpeg -i INPUT -c:v libvpx -ts-parameters ts_number_layers=3:\
+ts_target_bitrate=250000,500000,1000000:ts_rate_decimator=4,2,1:\
+ts_periodicity=4:ts_layer_id=0,2,1,2 OUTPUT
+@end example
+Below is a brief explanation of each of the parameters, please
+refer to @code{struct vpx_codec_enc_cfg} in @code{vpx/vpx_encoder.h} for more
+details.
+@table @option
+@item ts_number_layers
+Number of temporal coding layers.
+@item ts_target_bitrate
+Target bitrate for each temporal layer.
+@item ts_rate_decimator
+Frame rate decimation factor for each temporal layer.
+@item ts_periodicity
+Length of the sequence defining frame temporal layer membership.
+@item ts_layer_id
+Template defining the membership of frames to temporal layers.
+@end table
+@end table
+
 @item VP9-specific options
 @table @option
 @item lossless
diff --git a/libavcodec/libvpxenc.c b/libavcodec/libvpxenc.c
index 09f7a88452..206f17f4cd 100644
--- a/libavcodec/libvpxenc.c
+++ b/libavcodec/libvpxenc.c
@@ -33,6 +33,7 @@ 
 #include "libavutil/avassert.h"
 #include "libvpx.h"
 #include "profiles.h"
+#include "libavutil/avstring.h"
 #include "libavutil/base64.h"
 #include "libavutil/common.h"
 #include "libavutil/internal.h"
@@ -98,6 +99,8 @@  typedef struct VPxEncoderContext {
     int rc_undershoot_pct;
     int rc_overshoot_pct;
 
+    char *vp8_ts_parameters;
+
     // VP9-only
     int lossless;
     int tile_columns;
@@ -165,6 +168,7 @@  static av_cold void dump_enc_cfg(AVCodecContext *avctx,
 {
     int width = -30;
     int level = AV_LOG_DEBUG;
+    int i;
 
     av_log(avctx, level, "vpx_codec_enc_cfg\n");
     av_log(avctx, level, "generic settings\n"
@@ -204,6 +208,25 @@  static av_cold void dump_enc_cfg(AVCodecContext *avctx,
            "  %*s%u\n  %*s%u\n",
            width, "rc_undershoot_pct:", cfg->rc_undershoot_pct,
            width, "rc_overshoot_pct:",  cfg->rc_overshoot_pct);
+    av_log(avctx, level, "temporal layering settings\n"
+           "  %*s%u\n", width, "ts_number_layers:", cfg->ts_number_layers);
+    av_log(avctx, level,
+           "\n  %*s", width, "ts_target_bitrate:");
+    for (i = 0; i < VPX_TS_MAX_LAYERS; i++)
+        av_log(avctx, level, "%u ", cfg->ts_target_bitrate[i]);
+    av_log(avctx, level, "\n");
+    av_log(avctx, level,
+           "\n  %*s", width, "ts_rate_decimator:");
+    for (i = 0; i < VPX_TS_MAX_LAYERS; i++)
+        av_log(avctx, level, "%u ", cfg->ts_rate_decimator[i]);
+    av_log(avctx, level, "\n");
+    av_log(avctx, level,
+           "\n  %*s%u\n", width, "ts_periodicity:", cfg->ts_periodicity);
+    av_log(avctx, level,
+           "\n  %*s", width, "ts_layer_id:");
+    for (i = 0; i < VPX_TS_MAX_PERIODICITY; i++)
+        av_log(avctx, level, "%u ", cfg->ts_layer_id[i]);
+    av_log(avctx, level, "\n");
     av_log(avctx, level, "decoder buffer model\n"
             "  %*s%u\n  %*s%u\n  %*s%u\n",
             width, "rc_buf_sz:",         cfg->rc_buf_sz,
@@ -321,6 +344,41 @@  static av_cold int vpx_free(AVCodecContext *avctx)
     return 0;
 }
 
+#if CONFIG_LIBVPX_VP8_ENCODER
+static void vp8_ts_parse_int_array(int* dest, char *value, size_t value_len, int max_entries)
+{
+    int dest_idx = 0;
+    char *value_end = value + value_len;
+    while (value < value_end && dest_idx < max_entries) {
+        dest[dest_idx] = strtoul(value, &value, 10);
+        while (value < value_end && (*value == ',' || av_isspace(*value)))
+            value++;
+        dest_idx++;
+    }
+}
+
+static int vp8_ts_param_parse(struct vpx_codec_enc_cfg *enccfg, char *key, char *value)
+{
+    size_t value_len = strlen(value);
+
+    if (!value_len)
+        return -1;
+
+    if (!strcmp(key, "ts_number_layers"))
+        enccfg->ts_number_layers = strtoul(value, &value, 10);
+    else if (!strcmp(key, "ts_target_bitrate"))
+        vp8_ts_parse_int_array(enccfg->ts_target_bitrate, value, value_len, VPX_TS_MAX_LAYERS);
+    else if (!strcmp(key, "ts_rate_decimator"))
+      vp8_ts_parse_int_array(enccfg->ts_rate_decimator, value, value_len, VPX_TS_MAX_LAYERS);
+    else if (!strcmp(key, "ts_periodicity"))
+        enccfg->ts_periodicity = strtoul(value, &value, 10);
+    else if (!strcmp(key, "ts_layer_id"))
+        vp8_ts_parse_int_array(enccfg->ts_layer_id, value, value_len, VPX_TS_MAX_PERIODICITY);
+
+    return 0;
+}
+#endif
+
 #if CONFIG_LIBVPX_VP9_ENCODER
 static int set_pix_fmt(AVCodecContext *avctx, vpx_codec_caps_t codec_caps,
                        struct vpx_codec_enc_cfg *enccfg, vpx_codec_flags_t *flags,
@@ -635,6 +693,24 @@  FF_ENABLE_DEPRECATION_WARNINGS
 
     enccfg.g_error_resilient = ctx->error_resilient || ctx->flags & VP8F_ERROR_RESILIENT;
 
+#if CONFIG_LIBVPX_VP8_ENCODER
+    if (avctx->codec_id == AV_CODEC_ID_VP8 && ctx->vp8_ts_parameters) {
+        AVDictionary *dict    = NULL;
+        AVDictionaryEntry* en = NULL;
+
+        if (!av_dict_parse_string(&dict, ctx->vp8_ts_parameters, "=", ":", 0)) {
+            while ((en = av_dict_get(dict, "", en, AV_DICT_IGNORE_SUFFIX))) {
+                if (vp8_ts_param_parse(&enccfg, en->key, en->value) < 0)
+                    av_log(avctx, AV_LOG_WARNING,
+                           "Error parsing option '%s = %s'.\n",
+                           en->key, en->value);
+            }
+
+            av_dict_free(&dict);
+        }
+    }
+#endif
+
     dump_enc_cfg(avctx, &enccfg);
     /* Construct Encoder Context */
     res = vpx_codec_enc_init(&ctx->encoder, iface, &enccfg, flags);
@@ -1021,6 +1097,14 @@  static int vpx_encode(AVCodecContext *avctx, AVPacket *pkt,
 #endif
         if (frame->pict_type == AV_PICTURE_TYPE_I)
             flags |= VPX_EFLAG_FORCE_KF;
+#if CONFIG_LIBVPX_VP8_ENCODER
+        if (frame->metadata) {
+            AVDictionaryEntry* en = av_dict_get(frame->metadata, "vp8-flags", NULL, 0);
+            if (en) {
+                flags |= strtoul(en->value, NULL, 10);
+            }
+        }
+#endif
     }
 
     res = vpx_codec_encode(&ctx->encoder, rawimg, timestamp,
@@ -1113,6 +1197,7 @@  static int vpx_encode(AVCodecContext *avctx, AVPacket *pkt,
 static const AVOption vp8_options[] = {
     COMMON_OPTIONS
     { "cpu-used",        "Quality/Speed ratio modifier",                OFFSET(cpu_used),        AV_OPT_TYPE_INT, {.i64 = 1}, -16, 16, VE},
+    { "ts-parameters",   "Temporal scaling configuration using a :-separated list of key=value parameters", OFFSET(vp8_ts_parameters), AV_OPT_TYPE_STRING, {.str=NULL},  0,  0, VE},
     LEGACY_OPTIONS
     { NULL }
 };