diff mbox series

[FFmpeg-devel,v3,1/3] avcodec/libvpxenc: add VP9 temporal scalability encoding option

Message ID 20200108230006.169044-1-wonkap@google.com
State Superseded
Headers show
Series [FFmpeg-devel,v3,1/3] avcodec/libvpxenc: add VP9 temporal scalability encoding option
Related show

Checks

Context Check Description
andriy/ffmpeg-patchwork pending
andriy/ffmpeg-patchwork success Applied patch
andriy/ffmpeg-patchwork success Configure finished
andriy/ffmpeg-patchwork success Make finished
andriy/ffmpeg-patchwork success Make fate finished

Commit Message

Wonkap Jang Jan. 8, 2020, 11 p.m. UTC
This commit reuses the configuration options for VP8 that enables
temporal scalability for VP9. It also adds a way to enable three
preset temporal structures (refer to the documentation for more
detail) that can be used in offline encoding.
---
 libavcodec/libvpxenc.c | 251 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 228 insertions(+), 23 deletions(-)

Comments

Wonkap Jang Jan. 8, 2020, 11:04 p.m. UTC | #1
On Wed, Jan 8, 2020 at 3:00 PM Wonkap Jang <wonkap@google.com> wrote:

> This commit reuses the configuration options for VP8 that enables
> temporal scalability for VP9. It also adds a way to enable three
> preset temporal structures (refer to the documentation for more
> detail) that can be used in offline encoding.
> ---
>  libavcodec/libvpxenc.c | 251 +++++++++++++++++++++++++++++++++++++----
>  1 file changed, 228 insertions(+), 23 deletions(-)
>
> diff --git a/libavcodec/libvpxenc.c b/libavcodec/libvpxenc.c
> index 0b8a070304..14cc1e7158 100644
> --- a/libavcodec/libvpxenc.c
> +++ b/libavcodec/libvpxenc.c
> @@ -100,7 +100,9 @@ typedef struct VPxEncoderContext {
>      int rc_undershoot_pct;
>      int rc_overshoot_pct;
>
> -    AVDictionary *vp8_ts_parameters;
> +    AVDictionary *vpx_ts_parameters;
> +    int *ts_layer_flags;
> +    int current_temporal_idx;
>
>      // VP9-only
>      int lossless;
> @@ -137,6 +139,7 @@ static const char *const ctlidstr[] = {
>      [VP8E_SET_CQ_LEVEL]          = "VP8E_SET_CQ_LEVEL",
>      [VP8E_SET_MAX_INTRA_BITRATE_PCT] = "VP8E_SET_MAX_INTRA_BITRATE_PCT",
>      [VP8E_SET_SHARPNESS]               = "VP8E_SET_SHARPNESS",
> +    [VP8E_SET_TEMPORAL_LAYER_ID]       = "VP8E_SET_TEMPORAL_LAYER_ID",
>  #if CONFIG_LIBVPX_VP9_ENCODER
>      [VP9E_SET_LOSSLESS]                = "VP9E_SET_LOSSLESS",
>      [VP9E_SET_TILE_COLUMNS]            = "VP9E_SET_TILE_COLUMNS",
> @@ -144,6 +147,11 @@ static const char *const ctlidstr[] = {
>      [VP9E_SET_FRAME_PARALLEL_DECODING] =
> "VP9E_SET_FRAME_PARALLEL_DECODING",
>      [VP9E_SET_AQ_MODE]                 = "VP9E_SET_AQ_MODE",
>      [VP9E_SET_COLOR_SPACE]             = "VP9E_SET_COLOR_SPACE",
> +    [VP9E_SET_SVC_LAYER_ID]            = "VP9E_SET_SVC_LAYER_ID",
> +#if VPX_ENCODER_ABI_VERSION >= 12
> +    [VP9E_SET_SVC_PARAMETERS]          = "VP9E_SET_SVC_PARAMETERS",
> +#endif
> +    [VP9E_SET_SVC]                     = "VP9E_SET_SVC",
>  #if VPX_ENCODER_ABI_VERSION >= 11
>      [VP9E_SET_COLOR_RANGE]             = "VP9E_SET_COLOR_RANGE",
>  #endif
> @@ -223,8 +231,16 @@ static av_cold void dump_enc_cfg(AVCodecContext
> *avctx,
>             "  %*s%u\n", width, "ts_number_layers:",
> cfg->ts_number_layers);
>      av_log(avctx, level,
>             "\n  %*s", width, "ts_target_bitrate:");
> -    for (i = 0; i < VPX_TS_MAX_LAYERS; i++)
> -        av_log(avctx, level, "%u ", cfg->ts_target_bitrate[i]);
> +    if (avctx->codec_id == AV_CODEC_ID_VP8) {
> +        for (i = 0; i < VPX_TS_MAX_LAYERS; i++)
> +            av_log(avctx, level, "%u ", cfg->ts_target_bitrate[i]);
> +    }
> +#if (VPX_ENCODER_ABI_VERSION >= 12) && CONFIG_LIBVPX_VP9_ENCODER
> +    if (avctx->codec_id == AV_CODEC_ID_VP9) {
> +        for (i = 0; i < VPX_TS_MAX_LAYERS; i++)
> +            av_log(avctx, level, "%u ", cfg->layer_target_bitrate[i]);
> +    }
> +#endif
>      av_log(avctx, level, "\n");
>      av_log(avctx, level,
>             "\n  %*s", width, "ts_rate_decimator:");
> @@ -346,6 +362,8 @@ static av_cold int vpx_free(AVCodecContext *avctx)
>      }
>  #endif
>
> +    av_freep(&ctx->ts_layer_flags);
> +
>      vpx_codec_destroy(&ctx->encoder);
>      if (ctx->is_alpha) {
>          vpx_codec_destroy(&ctx->encoder_alpha);
> @@ -370,23 +388,154 @@ static void vp8_ts_parse_int_array(int *dest, char
> *value, size_t value_len, int
>      }
>  }
>
> -static int vp8_ts_param_parse(struct vpx_codec_enc_cfg *enccfg, char
> *key, char *value)
> +static void set_temporal_layer_pattern(int layering_mode,
> +                                       vpx_codec_enc_cfg_t *cfg,
> +                                       int *layer_flags,
> +                                       int *flag_periodicity)
> +{
> +    switch (layering_mode) {
> +    case 2: {
> +        /**
> +         * 2-layers, 2-frame period.
> +         */
> +        int ids[2] = { 0, 1 };
> +        cfg->ts_periodicity = 2;
> +        *flag_periodicity = 2;
> +        cfg->ts_number_layers = 2;
> +        cfg->ts_rate_decimator[0] = 2;
> +        cfg->ts_rate_decimator[1] = 1;
> +        memcpy(cfg->ts_layer_id, ids, sizeof(ids));
> +
> +        layer_flags[0] =
> +             VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
> +             VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
> +
> +        layer_flags[1] =
> +            VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_GF |
> +            VP8_EFLAG_NO_UPD_LAST |
> +            VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_REF_GF;
> +        break;
> +    }
> +    case 3: {
> +        /**
> +         * 3-layers structure with one reference frame.
> +         *  This works same as temporal_layering_mode 3.
> +         *
> +         * 3-layers, 4-frame period.
> +         */
> +        int ids[4] = { 0, 2, 1, 2 };
> +        cfg->ts_periodicity = 4;
> +        *flag_periodicity = 4;
> +        cfg->ts_number_layers = 3;
> +        cfg->ts_rate_decimator[0] = 4;
> +        cfg->ts_rate_decimator[1] = 2;
> +        cfg->ts_rate_decimator[2] = 1;
> +        memcpy(cfg->ts_layer_id, ids, sizeof(ids));
> +
> +        /**
> +         * 0=L, 1=GF, 2=ARF,
> +         * Intra-layer prediction disabled.
> +         */
> +        layer_flags[0] =
> +            VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
> +            VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
> +        layer_flags[2] =
> +            VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
> +            VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
> +        layer_flags[1] =
> +            VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
> +            VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
> +            VP8_EFLAG_NO_UPD_ARF;
> +        layer_flags[3] =
> +            VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_ARF |
> +            VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
> +            VP8_EFLAG_NO_UPD_ARF;
> +        break;
> +    }
> +    case 4: {
> +        /**
> +         * 3-layers structure.
> +         * added dependency between the two TL2 frames (on top of case 3).
> +         * 3-layers, 4-frame period.
> +         */
> +        int ids[4] = { 0, 2, 1, 2 };
> +        cfg->ts_periodicity = 4;
> +        *flag_periodicity = 4;
> +        cfg->ts_number_layers = 3;
> +        cfg->ts_rate_decimator[0] = 4;
> +        cfg->ts_rate_decimator[1] = 2;
> +        cfg->ts_rate_decimator[2] = 1;
> +        memcpy(cfg->ts_layer_id, ids, sizeof(ids));
> +
> +        /**
> +         * 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled.
> +         */
> +        layer_flags[0] =
> +            VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
> +            VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
> +        layer_flags[2] =
> +            VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
> +            VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
> +        layer_flags[1] =
> +            VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
> +            VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
> +        layer_flags[3] =
> +            VP8_EFLAG_NO_REF_LAST |
> +            VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
> +            VP8_EFLAG_NO_UPD_ARF;
> +        break;
> +    }
> +    default:
> +        /**
> +         * do not change the layer_flags or the flag_periodicity in this
> case;
> +         * it might be that the code is using external flags to be used.
> +         */
> +        break;
> +
> +    }
> +}
> +
> +static int vpx_ts_param_parse(VPxContext *ctx,
> +                              struct vpx_codec_enc_cfg *enccfg,
> +                              char *key, char *value,
> +                              const enum AVCodecID codec_id)
>  {
>      size_t value_len = strlen(value);
> +    int ts_layering_mode = 0;
>
>      if (!value_len)
>          return -1;
>
>      if (!strcmp(key, "ts_number_layers"))
>          enccfg->ts_number_layers = strtoul(value, &value, 10);
> -    else if (!strcmp(key, "ts_target_bitrate"))
> -        vp8_ts_parse_int_array(enccfg->ts_target_bitrate, value,
> value_len, VPX_TS_MAX_LAYERS);
> -    else if (!strcmp(key, "ts_rate_decimator"))
> -      vp8_ts_parse_int_array(enccfg->ts_rate_decimator, value, value_len,
> VPX_TS_MAX_LAYERS);
> -    else if (!strcmp(key, "ts_periodicity"))
> +    else if (!strcmp(key, "ts_target_bitrate")) {
> +        if (codec_id == AV_CODEC_ID_VP8)
> +            vp8_ts_parse_int_array(enccfg->ts_target_bitrate, value,
> value_len, VPX_TS_MAX_LAYERS);
> +#if (VPX_ENCODER_ABI_VERSION >= 12) && CONFIG_LIBVPX_VP9_ENCODER
> +        if (codec_id == AV_CODEC_ID_VP9)
> +            vp8_ts_parse_int_array(enccfg->layer_target_bitrate, value,
> value_len, VPX_TS_MAX_LAYERS);
> +#endif
> +    } else if (!strcmp(key, "ts_rate_decimator")) {
> +        vp8_ts_parse_int_array(enccfg->ts_rate_decimator, value,
> value_len, VPX_TS_MAX_LAYERS);
> +    } else if (!strcmp(key, "ts_periodicity")) {
>          enccfg->ts_periodicity = strtoul(value, &value, 10);
> -    else if (!strcmp(key, "ts_layer_id"))
> +    } else if (!strcmp(key, "ts_layer_id")) {
>          vp8_ts_parse_int_array(enccfg->ts_layer_id, value, value_len,
> VPX_TS_MAX_PERIODICITY);
> +    } else if (!strcmp(key, "ts_layering_mode")) {
> +        /* option for pre-defined temporal structures in function
> set_temporal_layer_pattern. */
> +        ts_layering_mode = strtoul(value, &value, 4);
> +    }
> +
> +#if (VPX_ENCODER_ABI_VERSION >= 12) && CONFIG_LIBVPX_VP9_ENCODER
> +    enccfg->temporal_layering_mode = 1; // only bypass mode is being
> supported for now.
> +    enccfg->ss_number_layers = 1; // making sure the spatial scalability
> is off. Will support this in future.
> +#endif
> +    if (ts_layering_mode) {
> +      // make sure the ts_layering_mode comes at the end of the
> ts_parameter string to ensure that
> +      // correct configuration is done.
> +      ctx->ts_layer_flags = av_malloc(sizeof(int) *
> VPX_TS_MAX_PERIODICITY);
> +      set_temporal_layer_pattern(ts_layering_mode, enccfg,
> ctx->ts_layer_flags, &enccfg->ts_periodicity);
> +    }
>
>      return 0;
>  }
> @@ -590,7 +739,9 @@ static av_cold int vpx_init(AVCodecContext *avctx,
>      vpx_img_fmt_t img_fmt = VPX_IMG_FMT_I420;
>  #if CONFIG_LIBVPX_VP9_ENCODER
>      vpx_codec_caps_t codec_caps = vpx_codec_get_caps(iface);
> +    vpx_svc_extra_cfg_t svc_params;
>  #endif
> +    AVDictionaryEntry* en = NULL;
>
>      av_log(avctx, AV_LOG_INFO, "%s\n", vpx_codec_version_str());
>      av_log(avctx, AV_LOG_VERBOSE, "%s\n", vpx_codec_build_config());
> @@ -611,8 +762,8 @@ static av_cold int vpx_init(AVCodecContext *avctx,
>      }
>  #endif
>
> -    if(!avctx->bit_rate)
> -        if(avctx->rc_max_rate || avctx->rc_buffer_size ||
> avctx->rc_initial_buffer_occupancy) {
> +    if (!avctx->bit_rate)
> +        if (avctx->rc_max_rate || avctx->rc_buffer_size ||
> avctx->rc_initial_buffer_occupancy) {
>              av_log( avctx, AV_LOG_ERROR, "Rate control parameters set
> without a bitrate\n");
>              return AVERROR(EINVAL);
>          }
> @@ -648,6 +799,9 @@ static av_cold int vpx_init(AVCodecContext *avctx,
>      if (avctx->bit_rate) {
>          enccfg.rc_target_bitrate = av_rescale_rnd(avctx->bit_rate, 1,
> 1000,
>                                                    AV_ROUND_NEAR_INF);
> +#if CONFIG_LIBVPX_VP9_ENCODER
> +        enccfg.ss_target_bitrate[0] = enccfg.rc_target_bitrate;
> +#endif
>      } else {
>          // Set bitrate to default value. Also sets CRF to default if
> needed.
>          set_vpx_defaults(avctx, &enccfg);
> @@ -757,14 +911,11 @@ FF_ENABLE_DEPRECATION_WARNINGS
>
>      enccfg.g_error_resilient = ctx->error_resilient || ctx->flags &
> VP8F_ERROR_RESILIENT;
>
> -    if (CONFIG_LIBVPX_VP8_ENCODER && avctx->codec_id == AV_CODEC_ID_VP8) {
> -        AVDictionaryEntry* en = NULL;
> -        while ((en = av_dict_get(ctx->vp8_ts_parameters, "", en,
> AV_DICT_IGNORE_SUFFIX))) {
> -            if (vp8_ts_param_parse(&enccfg, en->key, en->value) < 0)
> -                av_log(avctx, AV_LOG_WARNING,
> -                       "Error parsing option '%s = %s'.\n",
> -                       en->key, en->value);
> -        }
> +    while ((en = av_dict_get(ctx->vpx_ts_parameters, "", en,
> AV_DICT_IGNORE_SUFFIX))) {
> +        if (vpx_ts_param_parse(ctx, &enccfg, en->key, en->value,
> avctx->codec_id) < 0)
> +            av_log(avctx, AV_LOG_WARNING,
> +                   "Error parsing option '%s = %s'.\n",
> +                   en->key, en->value);
>      }
>
>      dump_enc_cfg(avctx, &enccfg);
> @@ -774,7 +925,21 @@ FF_ENABLE_DEPRECATION_WARNINGS
>          log_encoder_error(avctx, "Failed to initialize encoder");
>          return AVERROR(EINVAL);
>      }
> -
> +#if CONFIG_LIBVPX_VP9_ENCODER
> +    if (avctx->codec_id == AV_CODEC_ID_VP9 && enccfg.ts_number_layers >
> 1) {
> +        memset(&svc_params, 0, sizeof(svc_params));
> +        for (int i = 0; i < enccfg.ts_number_layers; ++i) {
> +            svc_params.max_quantizers[i] = enccfg.rc_max_quantizer;
> +            svc_params.min_quantizers[i] = enccfg.rc_min_quantizer;
> +        }
> +        svc_params.scaling_factor_num[0] = enccfg.g_h;
> +        svc_params.scaling_factor_den[0] = enccfg.g_h;
> +#if VPX_ENCODER_ABI_VERSION >= 12
> +        codecctl_int(avctx, VP9E_SET_SVC, 1);
> +        codecctl_intp(avctx, VP9E_SET_SVC_PARAMETERS, (int *)
> &svc_params);
> +#endif
> +    }
> +#endif
>      if (ctx->is_alpha) {
>          enccfg_alpha = enccfg;
>          res = vpx_codec_enc_init(&ctx->encoder_alpha, iface,
> &enccfg_alpha, flags);
> @@ -1321,6 +1486,9 @@ static int vpx_encode(AVCodecContext *avctx,
> AVPacket *pkt,
>      int64_t timestamp = 0;
>      int res, coded_size;
>      vpx_enc_frame_flags_t flags = 0;
> +    const struct vpx_codec_enc_cfg *enccfg = ctx->encoder.config.enc;
> +    vpx_svc_layer_id_t layer_id;
> +    int layer_id_valid = 0;
>
>      if (frame) {
>          const AVFrameSideData *sd = av_frame_get_side_data(frame,
> AV_FRAME_DATA_REGIONS_OF_INTEREST);
> @@ -1368,6 +1536,42 @@ static int vpx_encode(AVCodecContext *avctx,
> AVPacket *pkt,
>          }
>      }
>
> +    // this is for encoding with preset temporal layering patterns
> defined in
> +    // set_temporal_layer_pattern function.
> +    if (enccfg->ts_number_layers > 1 && ctx->ts_layer_flags) {
> +        if (flags & VPX_EFLAG_FORCE_KF) {
> +            // keyframe, reset temporal layering.
> +            ctx->current_temporal_idx = 0;
> +            flags = VPX_EFLAG_FORCE_KF;
> +        } else {
> +            flags = 0;
> +        }
> +
> +        /* get the flags from the temporal layer configuration. */
> +        flags |= ctx->ts_layer_flags[ctx->current_temporal_idx];
> +
> +        memset(&layer_id, 0, sizeof(layer_id));
> +#if VPX_ENCODER_ABI_VERSION >= 12
> +        layer_id.spatial_layer_id = 0;
> +#endif
> +        layer_id.temporal_layer_id =
> enccfg->ts_layer_id[ctx->current_temporal_idx];
> +#ifdef VPX_CTRL_VP9E_SET_MAX_INTER_BITRATE_PCT
> +        layer_id.temporal_layer_id_per_spatial[0] =
> layer_id.temporal_layer_id;
> +#endif
> +        layer_id_valid = 1;
> +    }
> +
> +    if (layer_id_valid) {
> +        if (avctx->codec_id == AV_CODEC_ID_VP8) {
> +            codecctl_int(avctx, VP8E_SET_TEMPORAL_LAYER_ID,
> layer_id.temporal_layer_id);
> +        }
> +#if CONFIG_LIBVPX_VP9_ENCODER && VPX_ENCODER_ABI_VERSION >= 12
> +        else if (avctx->codec_id == AV_CODEC_ID_VP9) {
> +            codecctl_intp(avctx, VP9E_SET_SVC_LAYER_ID, (int *)
> &layer_id);
> +        }
> +#endif
> +    }
> +
>      res = vpx_codec_encode(&ctx->encoder, rawimg, timestamp,
>                             avctx->ticks_per_frame, flags, ctx->deadline);
>      if (res != VPX_CODEC_OK) {
> @@ -1397,6 +1601,8 @@ static int vpx_encode(AVCodecContext *avctx,
> AVPacket *pkt,
>          }
>          av_base64_encode(avctx->stats_out, b64_size,
> ctx->twopass_stats.buf,
>                           ctx->twopass_stats.sz);
> +    } else if (enccfg->ts_number_layers > 1 && ctx->ts_layer_flags) {
> +        ctx->current_temporal_idx = (ctx->current_temporal_idx + 1) %
> enccfg->ts_periodicity;
>      }
>
>      *got_packet = !!coded_size;
> @@ -1435,6 +1641,7 @@ static int vpx_encode(AVCodecContext *avctx,
> AVPacket *pkt,
>      { "noise-sensitivity", "Noise sensitivity",
> OFFSET(noise_sensitivity), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 4, VE}, \
>      { "undershoot-pct",  "Datarate undershoot (min) target (%)",
> OFFSET(rc_undershoot_pct), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 100, VE }, \
>      { "overshoot-pct",   "Datarate overshoot (max) target (%)",
> OFFSET(rc_overshoot_pct), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1000, VE }, \
> +    { "ts-parameters",   "Temporal scaling configuration using a
> :-separated list of key=value parameters", OFFSET(vpx_ts_parameters),
> AV_OPT_TYPE_DICT, {.str=NULL},  0,  0, VE}, \
>
>  #define LEGACY_OPTIONS \
>      {"speed", "", offsetof(VPxContext, cpu_used), AV_OPT_TYPE_INT, {.i64
> = 1}, -16, 16, VE}, \
> @@ -1454,8 +1661,6 @@ static const AVOption vp8_options[] = {
>      { "auto-alt-ref",    "Enable use of alternate reference "
>                           "frames (2-pass only)",
> OFFSET(auto_alt_ref),    AV_OPT_TYPE_INT, {.i64 = -1}, -1,  2, VE},
>      { "cpu-used",        "Quality/Speed ratio modifier",
> OFFSET(cpu_used),        AV_OPT_TYPE_INT, {.i64 = 1}, -16, 16, VE},
> -    { "ts-parameters",   "Temporal scaling configuration using a "
> -                         ":-separated list of key=value parameters",
> OFFSET(vp8_ts_parameters), AV_OPT_TYPE_DICT, {.str=NULL},  0,  0, VE},
>      LEGACY_OPTIONS
>      { NULL }
>  };
> --
> 2.25.0.rc1.283.g88dfdc4193-goog
>
>
Changes from v2:
1. fixed the versioning problem with older versions of libvpx library
2. changed sizeof to use variable rather than type.
3. merged two patches that are dependent on each other.
4. changed documentation to include only related documentation changes.

Thank you,

Wonkap
diff mbox series

Patch

diff --git a/libavcodec/libvpxenc.c b/libavcodec/libvpxenc.c
index 0b8a070304..14cc1e7158 100644
--- a/libavcodec/libvpxenc.c
+++ b/libavcodec/libvpxenc.c
@@ -100,7 +100,9 @@  typedef struct VPxEncoderContext {
     int rc_undershoot_pct;
     int rc_overshoot_pct;
 
-    AVDictionary *vp8_ts_parameters;
+    AVDictionary *vpx_ts_parameters;
+    int *ts_layer_flags;
+    int current_temporal_idx;
 
     // VP9-only
     int lossless;
@@ -137,6 +139,7 @@  static const char *const ctlidstr[] = {
     [VP8E_SET_CQ_LEVEL]          = "VP8E_SET_CQ_LEVEL",
     [VP8E_SET_MAX_INTRA_BITRATE_PCT] = "VP8E_SET_MAX_INTRA_BITRATE_PCT",
     [VP8E_SET_SHARPNESS]               = "VP8E_SET_SHARPNESS",
+    [VP8E_SET_TEMPORAL_LAYER_ID]       = "VP8E_SET_TEMPORAL_LAYER_ID",
 #if CONFIG_LIBVPX_VP9_ENCODER
     [VP9E_SET_LOSSLESS]                = "VP9E_SET_LOSSLESS",
     [VP9E_SET_TILE_COLUMNS]            = "VP9E_SET_TILE_COLUMNS",
@@ -144,6 +147,11 @@  static const char *const ctlidstr[] = {
     [VP9E_SET_FRAME_PARALLEL_DECODING] = "VP9E_SET_FRAME_PARALLEL_DECODING",
     [VP9E_SET_AQ_MODE]                 = "VP9E_SET_AQ_MODE",
     [VP9E_SET_COLOR_SPACE]             = "VP9E_SET_COLOR_SPACE",
+    [VP9E_SET_SVC_LAYER_ID]            = "VP9E_SET_SVC_LAYER_ID",
+#if VPX_ENCODER_ABI_VERSION >= 12
+    [VP9E_SET_SVC_PARAMETERS]          = "VP9E_SET_SVC_PARAMETERS",
+#endif
+    [VP9E_SET_SVC]                     = "VP9E_SET_SVC",
 #if VPX_ENCODER_ABI_VERSION >= 11
     [VP9E_SET_COLOR_RANGE]             = "VP9E_SET_COLOR_RANGE",
 #endif
@@ -223,8 +231,16 @@  static av_cold void dump_enc_cfg(AVCodecContext *avctx,
            "  %*s%u\n", width, "ts_number_layers:", cfg->ts_number_layers);
     av_log(avctx, level,
            "\n  %*s", width, "ts_target_bitrate:");
-    for (i = 0; i < VPX_TS_MAX_LAYERS; i++)
-        av_log(avctx, level, "%u ", cfg->ts_target_bitrate[i]);
+    if (avctx->codec_id == AV_CODEC_ID_VP8) {
+        for (i = 0; i < VPX_TS_MAX_LAYERS; i++)
+            av_log(avctx, level, "%u ", cfg->ts_target_bitrate[i]);
+    }
+#if (VPX_ENCODER_ABI_VERSION >= 12) && CONFIG_LIBVPX_VP9_ENCODER
+    if (avctx->codec_id == AV_CODEC_ID_VP9) {
+        for (i = 0; i < VPX_TS_MAX_LAYERS; i++)
+            av_log(avctx, level, "%u ", cfg->layer_target_bitrate[i]);
+    }
+#endif
     av_log(avctx, level, "\n");
     av_log(avctx, level,
            "\n  %*s", width, "ts_rate_decimator:");
@@ -346,6 +362,8 @@  static av_cold int vpx_free(AVCodecContext *avctx)
     }
 #endif
 
+    av_freep(&ctx->ts_layer_flags);
+
     vpx_codec_destroy(&ctx->encoder);
     if (ctx->is_alpha) {
         vpx_codec_destroy(&ctx->encoder_alpha);
@@ -370,23 +388,154 @@  static void vp8_ts_parse_int_array(int *dest, char *value, size_t value_len, int
     }
 }
 
-static int vp8_ts_param_parse(struct vpx_codec_enc_cfg *enccfg, char *key, char *value)
+static void set_temporal_layer_pattern(int layering_mode,
+                                       vpx_codec_enc_cfg_t *cfg,
+                                       int *layer_flags,
+                                       int *flag_periodicity)
+{
+    switch (layering_mode) {
+    case 2: {
+        /**
+         * 2-layers, 2-frame period.
+         */
+        int ids[2] = { 0, 1 };
+        cfg->ts_periodicity = 2;
+        *flag_periodicity = 2;
+        cfg->ts_number_layers = 2;
+        cfg->ts_rate_decimator[0] = 2;
+        cfg->ts_rate_decimator[1] = 1;
+        memcpy(cfg->ts_layer_id, ids, sizeof(ids));
+
+        layer_flags[0] =
+             VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
+             VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
+
+        layer_flags[1] =
+            VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_GF |
+            VP8_EFLAG_NO_UPD_LAST |
+            VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_REF_GF;
+        break;
+    }
+    case 3: {
+        /**
+         * 3-layers structure with one reference frame.
+         *  This works same as temporal_layering_mode 3.
+         *
+         * 3-layers, 4-frame period.
+         */
+        int ids[4] = { 0, 2, 1, 2 };
+        cfg->ts_periodicity = 4;
+        *flag_periodicity = 4;
+        cfg->ts_number_layers = 3;
+        cfg->ts_rate_decimator[0] = 4;
+        cfg->ts_rate_decimator[1] = 2;
+        cfg->ts_rate_decimator[2] = 1;
+        memcpy(cfg->ts_layer_id, ids, sizeof(ids));
+
+        /**
+         * 0=L, 1=GF, 2=ARF,
+         * Intra-layer prediction disabled.
+         */
+        layer_flags[0] =
+            VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
+            VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
+        layer_flags[2] =
+            VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
+            VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
+        layer_flags[1] =
+            VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
+            VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
+            VP8_EFLAG_NO_UPD_ARF;
+        layer_flags[3] =
+            VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_ARF |
+            VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
+            VP8_EFLAG_NO_UPD_ARF;
+        break;
+    }
+    case 4: {
+        /**
+         * 3-layers structure.
+         * added dependency between the two TL2 frames (on top of case 3).
+         * 3-layers, 4-frame period.
+         */
+        int ids[4] = { 0, 2, 1, 2 };
+        cfg->ts_periodicity = 4;
+        *flag_periodicity = 4;
+        cfg->ts_number_layers = 3;
+        cfg->ts_rate_decimator[0] = 4;
+        cfg->ts_rate_decimator[1] = 2;
+        cfg->ts_rate_decimator[2] = 1;
+        memcpy(cfg->ts_layer_id, ids, sizeof(ids));
+
+        /**
+         * 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled.
+         */
+        layer_flags[0] =
+            VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
+            VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
+        layer_flags[2] =
+            VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
+            VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
+        layer_flags[1] =
+            VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
+            VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
+        layer_flags[3] =
+            VP8_EFLAG_NO_REF_LAST |
+            VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
+            VP8_EFLAG_NO_UPD_ARF;
+        break;
+    }
+    default:
+        /**
+         * do not change the layer_flags or the flag_periodicity in this case;
+         * it might be that the code is using external flags to be used.
+         */
+        break;
+
+    }
+}
+
+static int vpx_ts_param_parse(VPxContext *ctx,
+                              struct vpx_codec_enc_cfg *enccfg,
+                              char *key, char *value,
+                              const enum AVCodecID codec_id)
 {
     size_t value_len = strlen(value);
+    int ts_layering_mode = 0;
 
     if (!value_len)
         return -1;
 
     if (!strcmp(key, "ts_number_layers"))
         enccfg->ts_number_layers = strtoul(value, &value, 10);
-    else if (!strcmp(key, "ts_target_bitrate"))
-        vp8_ts_parse_int_array(enccfg->ts_target_bitrate, value, value_len, VPX_TS_MAX_LAYERS);
-    else if (!strcmp(key, "ts_rate_decimator"))
-      vp8_ts_parse_int_array(enccfg->ts_rate_decimator, value, value_len, VPX_TS_MAX_LAYERS);
-    else if (!strcmp(key, "ts_periodicity"))
+    else if (!strcmp(key, "ts_target_bitrate")) {
+        if (codec_id == AV_CODEC_ID_VP8)
+            vp8_ts_parse_int_array(enccfg->ts_target_bitrate, value, value_len, VPX_TS_MAX_LAYERS);
+#if (VPX_ENCODER_ABI_VERSION >= 12) && CONFIG_LIBVPX_VP9_ENCODER
+        if (codec_id == AV_CODEC_ID_VP9)
+            vp8_ts_parse_int_array(enccfg->layer_target_bitrate, value, value_len, VPX_TS_MAX_LAYERS);
+#endif
+    } else if (!strcmp(key, "ts_rate_decimator")) {
+        vp8_ts_parse_int_array(enccfg->ts_rate_decimator, value, value_len, VPX_TS_MAX_LAYERS);
+    } else if (!strcmp(key, "ts_periodicity")) {
         enccfg->ts_periodicity = strtoul(value, &value, 10);
-    else if (!strcmp(key, "ts_layer_id"))
+    } else if (!strcmp(key, "ts_layer_id")) {
         vp8_ts_parse_int_array(enccfg->ts_layer_id, value, value_len, VPX_TS_MAX_PERIODICITY);
+    } else if (!strcmp(key, "ts_layering_mode")) {
+        /* option for pre-defined temporal structures in function set_temporal_layer_pattern. */
+        ts_layering_mode = strtoul(value, &value, 4);
+    }
+
+#if (VPX_ENCODER_ABI_VERSION >= 12) && CONFIG_LIBVPX_VP9_ENCODER
+    enccfg->temporal_layering_mode = 1; // only bypass mode is being supported for now.
+    enccfg->ss_number_layers = 1; // making sure the spatial scalability is off. Will support this in future.
+#endif
+    if (ts_layering_mode) {
+      // make sure the ts_layering_mode comes at the end of the ts_parameter string to ensure that
+      // correct configuration is done.
+      ctx->ts_layer_flags = av_malloc(sizeof(int) * VPX_TS_MAX_PERIODICITY);
+      set_temporal_layer_pattern(ts_layering_mode, enccfg, ctx->ts_layer_flags, &enccfg->ts_periodicity);
+    }
 
     return 0;
 }
@@ -590,7 +739,9 @@  static av_cold int vpx_init(AVCodecContext *avctx,
     vpx_img_fmt_t img_fmt = VPX_IMG_FMT_I420;
 #if CONFIG_LIBVPX_VP9_ENCODER
     vpx_codec_caps_t codec_caps = vpx_codec_get_caps(iface);
+    vpx_svc_extra_cfg_t svc_params;
 #endif
+    AVDictionaryEntry* en = NULL;
 
     av_log(avctx, AV_LOG_INFO, "%s\n", vpx_codec_version_str());
     av_log(avctx, AV_LOG_VERBOSE, "%s\n", vpx_codec_build_config());
@@ -611,8 +762,8 @@  static av_cold int vpx_init(AVCodecContext *avctx,
     }
 #endif
 
-    if(!avctx->bit_rate)
-        if(avctx->rc_max_rate || avctx->rc_buffer_size || avctx->rc_initial_buffer_occupancy) {
+    if (!avctx->bit_rate)
+        if (avctx->rc_max_rate || avctx->rc_buffer_size || avctx->rc_initial_buffer_occupancy) {
             av_log( avctx, AV_LOG_ERROR, "Rate control parameters set without a bitrate\n");
             return AVERROR(EINVAL);
         }
@@ -648,6 +799,9 @@  static av_cold int vpx_init(AVCodecContext *avctx,
     if (avctx->bit_rate) {
         enccfg.rc_target_bitrate = av_rescale_rnd(avctx->bit_rate, 1, 1000,
                                                   AV_ROUND_NEAR_INF);
+#if CONFIG_LIBVPX_VP9_ENCODER
+        enccfg.ss_target_bitrate[0] = enccfg.rc_target_bitrate;
+#endif
     } else {
         // Set bitrate to default value. Also sets CRF to default if needed.
         set_vpx_defaults(avctx, &enccfg);
@@ -757,14 +911,11 @@  FF_ENABLE_DEPRECATION_WARNINGS
 
     enccfg.g_error_resilient = ctx->error_resilient || ctx->flags & VP8F_ERROR_RESILIENT;
 
-    if (CONFIG_LIBVPX_VP8_ENCODER && avctx->codec_id == AV_CODEC_ID_VP8) {
-        AVDictionaryEntry* en = NULL;
-        while ((en = av_dict_get(ctx->vp8_ts_parameters, "", en, AV_DICT_IGNORE_SUFFIX))) {
-            if (vp8_ts_param_parse(&enccfg, en->key, en->value) < 0)
-                av_log(avctx, AV_LOG_WARNING,
-                       "Error parsing option '%s = %s'.\n",
-                       en->key, en->value);
-        }
+    while ((en = av_dict_get(ctx->vpx_ts_parameters, "", en, AV_DICT_IGNORE_SUFFIX))) {
+        if (vpx_ts_param_parse(ctx, &enccfg, en->key, en->value, avctx->codec_id) < 0)
+            av_log(avctx, AV_LOG_WARNING,
+                   "Error parsing option '%s = %s'.\n",
+                   en->key, en->value);
     }
 
     dump_enc_cfg(avctx, &enccfg);
@@ -774,7 +925,21 @@  FF_ENABLE_DEPRECATION_WARNINGS
         log_encoder_error(avctx, "Failed to initialize encoder");
         return AVERROR(EINVAL);
     }
-
+#if CONFIG_LIBVPX_VP9_ENCODER
+    if (avctx->codec_id == AV_CODEC_ID_VP9 && enccfg.ts_number_layers > 1) {
+        memset(&svc_params, 0, sizeof(svc_params));
+        for (int i = 0; i < enccfg.ts_number_layers; ++i) {
+            svc_params.max_quantizers[i] = enccfg.rc_max_quantizer;
+            svc_params.min_quantizers[i] = enccfg.rc_min_quantizer;
+        }
+        svc_params.scaling_factor_num[0] = enccfg.g_h;
+        svc_params.scaling_factor_den[0] = enccfg.g_h;
+#if VPX_ENCODER_ABI_VERSION >= 12
+        codecctl_int(avctx, VP9E_SET_SVC, 1);
+        codecctl_intp(avctx, VP9E_SET_SVC_PARAMETERS, (int *) &svc_params);
+#endif
+    }
+#endif
     if (ctx->is_alpha) {
         enccfg_alpha = enccfg;
         res = vpx_codec_enc_init(&ctx->encoder_alpha, iface, &enccfg_alpha, flags);
@@ -1321,6 +1486,9 @@  static int vpx_encode(AVCodecContext *avctx, AVPacket *pkt,
     int64_t timestamp = 0;
     int res, coded_size;
     vpx_enc_frame_flags_t flags = 0;
+    const struct vpx_codec_enc_cfg *enccfg = ctx->encoder.config.enc;
+    vpx_svc_layer_id_t layer_id;
+    int layer_id_valid = 0;
 
     if (frame) {
         const AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_REGIONS_OF_INTEREST);
@@ -1368,6 +1536,42 @@  static int vpx_encode(AVCodecContext *avctx, AVPacket *pkt,
         }
     }
 
+    // this is for encoding with preset temporal layering patterns defined in
+    // set_temporal_layer_pattern function.
+    if (enccfg->ts_number_layers > 1 && ctx->ts_layer_flags) {
+        if (flags & VPX_EFLAG_FORCE_KF) {
+            // keyframe, reset temporal layering.
+            ctx->current_temporal_idx = 0;
+            flags = VPX_EFLAG_FORCE_KF;
+        } else {
+            flags = 0;
+        }
+
+        /* get the flags from the temporal layer configuration. */
+        flags |= ctx->ts_layer_flags[ctx->current_temporal_idx];
+
+        memset(&layer_id, 0, sizeof(layer_id));
+#if VPX_ENCODER_ABI_VERSION >= 12
+        layer_id.spatial_layer_id = 0;
+#endif
+        layer_id.temporal_layer_id = enccfg->ts_layer_id[ctx->current_temporal_idx];
+#ifdef VPX_CTRL_VP9E_SET_MAX_INTER_BITRATE_PCT
+        layer_id.temporal_layer_id_per_spatial[0] = layer_id.temporal_layer_id;
+#endif
+        layer_id_valid = 1;
+    }
+
+    if (layer_id_valid) {
+        if (avctx->codec_id == AV_CODEC_ID_VP8) {
+            codecctl_int(avctx, VP8E_SET_TEMPORAL_LAYER_ID, layer_id.temporal_layer_id);
+        }
+#if CONFIG_LIBVPX_VP9_ENCODER && VPX_ENCODER_ABI_VERSION >= 12
+        else if (avctx->codec_id == AV_CODEC_ID_VP9) {
+            codecctl_intp(avctx, VP9E_SET_SVC_LAYER_ID, (int *) &layer_id);
+        }
+#endif
+    }
+
     res = vpx_codec_encode(&ctx->encoder, rawimg, timestamp,
                            avctx->ticks_per_frame, flags, ctx->deadline);
     if (res != VPX_CODEC_OK) {
@@ -1397,6 +1601,8 @@  static int vpx_encode(AVCodecContext *avctx, AVPacket *pkt,
         }
         av_base64_encode(avctx->stats_out, b64_size, ctx->twopass_stats.buf,
                          ctx->twopass_stats.sz);
+    } else if (enccfg->ts_number_layers > 1 && ctx->ts_layer_flags) {
+        ctx->current_temporal_idx = (ctx->current_temporal_idx + 1) % enccfg->ts_periodicity;
     }
 
     *got_packet = !!coded_size;
@@ -1435,6 +1641,7 @@  static int vpx_encode(AVCodecContext *avctx, AVPacket *pkt,
     { "noise-sensitivity", "Noise sensitivity", OFFSET(noise_sensitivity), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 4, VE}, \
     { "undershoot-pct",  "Datarate undershoot (min) target (%)", OFFSET(rc_undershoot_pct), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 100, VE }, \
     { "overshoot-pct",   "Datarate overshoot (max) target (%)", OFFSET(rc_overshoot_pct), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1000, VE }, \
+    { "ts-parameters",   "Temporal scaling configuration using a :-separated list of key=value parameters", OFFSET(vpx_ts_parameters), AV_OPT_TYPE_DICT, {.str=NULL},  0,  0, VE}, \
 
 #define LEGACY_OPTIONS \
     {"speed", "", offsetof(VPxContext, cpu_used), AV_OPT_TYPE_INT, {.i64 = 1}, -16, 16, VE}, \
@@ -1454,8 +1661,6 @@  static const AVOption vp8_options[] = {
     { "auto-alt-ref",    "Enable use of alternate reference "
                          "frames (2-pass only)",                        OFFSET(auto_alt_ref),    AV_OPT_TYPE_INT, {.i64 = -1}, -1,  2, VE},
     { "cpu-used",        "Quality/Speed ratio modifier",                OFFSET(cpu_used),        AV_OPT_TYPE_INT, {.i64 = 1}, -16, 16, VE},
-    { "ts-parameters",   "Temporal scaling configuration using a "
-                         ":-separated list of key=value parameters",    OFFSET(vp8_ts_parameters), AV_OPT_TYPE_DICT, {.str=NULL},  0,  0, VE},
     LEGACY_OPTIONS
     { NULL }
 };