diff mbox series

[FFmpeg-devel,v4] avcodec/libvpxenc: add VP9 temporal scalability encoding option

Message ID 20200114185911.223482-1-wonkap@google.com
State Superseded
Headers show
Series [FFmpeg-devel,v4] avcodec/libvpxenc: add VP9 temporal scalability encoding option | expand

Checks

Context Check Description
andriy/ffmpeg-patchwork success Make fate finished

Commit Message

Wonkap Jang Jan. 14, 2020, 6:59 p.m. UTC
This commit reuses the configuration options for VP8 that enables
temporal scalability for VP9. It also adds a way to enable three
preset temporal structures (refer to the documentation for more
detail) that can be used in offline encoding.
---
 doc/encoders.texi      |  18 ++-
 libavcodec/libvpxenc.c | 252 +++++++++++++++++++++++++++++++++++++----
 2 files changed, 244 insertions(+), 26 deletions(-)

Comments

James Zern Jan. 15, 2020, 4:47 a.m. UTC | #1
On Tue, Jan 14, 2020 at 11:07 AM Wonkap Jang
<wonkap-at-google.com@ffmpeg.org> wrote:
>
> This commit reuses the configuration options for VP8 that enables
> temporal scalability for VP9. It also adds a way to enable three
> preset temporal structures (refer to the documentation for more
> detail) that can be used in offline encoding.
> ---
>  doc/encoders.texi      |  18 ++-
>  libavcodec/libvpxenc.c | 252 +++++++++++++++++++++++++++++++++++++----
>  2 files changed, 244 insertions(+), 26 deletions(-)
>
> [...]
> @@ -221,10 +229,20 @@ static av_cold void dump_enc_cfg(AVCodecContext *avctx,
>             width, "rc_overshoot_pct:",  cfg->rc_overshoot_pct);
>      av_log(avctx, level, "temporal layering settings\n"
>             "  %*s%u\n", width, "ts_number_layers:", cfg->ts_number_layers);
> -    av_log(avctx, level,
> -           "\n  %*s", width, "ts_target_bitrate:");
> -    for (i = 0; i < VPX_TS_MAX_LAYERS; i++)
> -        av_log(avctx, level, "%u ", cfg->ts_target_bitrate[i]);
> +    if (avctx->codec_id == AV_CODEC_ID_VP8) {
> +        av_log(avctx, level,
> +            "\n  %*s", width, "ts_target_bitrate:");

align this with the '(' here and below.

> +        for (i = 0; i < VPX_TS_MAX_LAYERS; i++)
> +            av_log(avctx, level, "%u ", cfg->ts_target_bitrate[i]);
> +    }
> +#if (VPX_ENCODER_ABI_VERSION >= 12) && CONFIG_LIBVPX_VP9_ENCODER
> +    if (avctx->codec_id == AV_CODEC_ID_VP9) {
> +        av_log(avctx, level,
> +            "\n  %*s", width, "layer_target_bitrate:");

trailing whitespace

> +        for (i = 0; i < VPX_TS_MAX_LAYERS; i++)
> +            av_log(avctx, level, "%u ", cfg->layer_target_bitrate[i]);
> +    }
> +#endif
>      av_log(avctx, level, "\n");
>      av_log(avctx, level,
>             "\n  %*s", width, "ts_rate_decimator:");
> @@ -346,6 +364,8 @@ static av_cold int vpx_free(AVCodecContext *avctx)
>      }
>  #endif
>
> +    av_freep(&ctx->ts_layer_flags);
> +
>      vpx_codec_destroy(&ctx->encoder);
>      if (ctx->is_alpha) {
>          vpx_codec_destroy(&ctx->encoder_alpha);
> @@ -370,23 +390,153 @@ static void vp8_ts_parse_int_array(int *dest, char *value, size_t value_len, int
>      }
>  }
>
> -static int vp8_ts_param_parse(struct vpx_codec_enc_cfg *enccfg, char *key, char *value)
> +static void set_temporal_layer_pattern(int layering_mode,
> +                                       vpx_codec_enc_cfg_t *cfg,
> +                                       int *layer_flags,
> +                                       int *flag_periodicity)
> +{
> +    switch (layering_mode) {
> +    case 2: {
> +        /**
> +         * 2-layers, 2-frame period.
> +         */
> +        int ids[2] = { 0, 1 };
> +        cfg->ts_periodicity = 2;
> +        *flag_periodicity = 2;
> +        cfg->ts_number_layers = 2;
> +        cfg->ts_rate_decimator[0] = 2;
> +        cfg->ts_rate_decimator[1] = 1;
> +        memcpy(cfg->ts_layer_id, ids, sizeof(ids));
> +
> +        layer_flags[0] =
> +             VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
> +             VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
> +        layer_flags[1] =
> +            VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_GF |
> +            VP8_EFLAG_NO_UPD_LAST |
> +            VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_REF_GF;
> +        break;
> +    }
> +    case 3: {
> +        /**
> +         * 3-layers structure with one reference frame.
> +         *  This works same as temporal_layering_mode 3.
> +         *
> +         * 3-layers, 4-frame period.
> +         */
> +        int ids[4] = { 0, 2, 1, 2 };
> +        cfg->ts_periodicity = 4;
> +        *flag_periodicity = 4;
> +        cfg->ts_number_layers = 3;
> +        cfg->ts_rate_decimator[0] = 4;
> +        cfg->ts_rate_decimator[1] = 2;
> +        cfg->ts_rate_decimator[2] = 1;
> +        memcpy(cfg->ts_layer_id, ids, sizeof(ids));
> +
> +        /**
> +         * 0=L, 1=GF, 2=ARF,

my point with the earlier comment was that you document 3 indices, but
set 4. if [3] won't be used in this case then the assignment could be
removed.

> [...]
> +
> +static int vpx_ts_param_parse(VPxContext *ctx,
> +                              struct vpx_codec_enc_cfg *enccfg,
> +                              char *key, char *value,
> +                              const enum AVCodecID codec_id)

it's not common in this code base to mark non-pointer parameters
const. you can merge the first and second lines and the third and
fourth since they're not overly long.

> [...]
> +    if (ts_layering_mode) {
> +      // make sure the ts_layering_mode comes at the end of the ts_parameter string to ensure that
> +      // correct configuration is done.
> +      ctx->ts_layer_flags = av_malloc(sizeof(*ctx->ts_layer_flags) * VPX_TS_MAX_PERIODICITY);
> +      set_temporal_layer_pattern(ts_layering_mode, enccfg, ctx->ts_layer_flags, &enccfg->ts_periodicity);

indent is incorrect, it should be 4 spaces.
Wonkap Jang Jan. 16, 2020, 2:19 a.m. UTC | #2
HI,
My comments are in-line.

On Tue, Jan 14, 2020 at 8:55 PM James Zern <jzern-at-google.com@ffmpeg.org>
wrote:

> On Tue, Jan 14, 2020 at 11:07 AM Wonkap Jang
> <wonkap-at-google.com@ffmpeg.org> wrote:
> >
> > This commit reuses the configuration options for VP8 that enables
> > temporal scalability for VP9. It also adds a way to enable three
> > preset temporal structures (refer to the documentation for more
> > detail) that can be used in offline encoding.
> > ---
> >  doc/encoders.texi      |  18 ++-
> >  libavcodec/libvpxenc.c | 252 +++++++++++++++++++++++++++++++++++++----
> >  2 files changed, 244 insertions(+), 26 deletions(-)
> >
> > [...]
> > @@ -221,10 +229,20 @@ static av_cold void dump_enc_cfg(AVCodecContext
> *avctx,
> >             width, "rc_overshoot_pct:",  cfg->rc_overshoot_pct);
> >      av_log(avctx, level, "temporal layering settings\n"
> >             "  %*s%u\n", width, "ts_number_layers:",
> cfg->ts_number_layers);
> > -    av_log(avctx, level,
> > -           "\n  %*s", width, "ts_target_bitrate:");
> > -    for (i = 0; i < VPX_TS_MAX_LAYERS; i++)
> > -        av_log(avctx, level, "%u ", cfg->ts_target_bitrate[i]);
> > +    if (avctx->codec_id == AV_CODEC_ID_VP8) {
> > +        av_log(avctx, level,
> > +            "\n  %*s", width, "ts_target_bitrate:");
>
> align this with the '(' here and below.
>
[WJ] Not sure why this was sent this way, as the lines were on the same
line in the commit. Maybe lint code running?
I will make the change to get around that.

>
> > +        for (i = 0; i < VPX_TS_MAX_LAYERS; i++)
> > +            av_log(avctx, level, "%u ", cfg->ts_target_bitrate[i]);
> > +    }
> > +#if (VPX_ENCODER_ABI_VERSION >= 12) && CONFIG_LIBVPX_VP9_ENCODER
> > +    if (avctx->codec_id == AV_CODEC_ID_VP9) {
> > +        av_log(avctx, level,
> > +            "\n  %*s", width, "layer_target_bitrate:");
>
> trailing whitespace
>
[WJ] Will do.

>
> > +        for (i = 0; i < VPX_TS_MAX_LAYERS; i++)
> > +            av_log(avctx, level, "%u ", cfg->layer_target_bitrate[i]);
> > +    }
> > +#endif
> >      av_log(avctx, level, "\n");
> >      av_log(avctx, level,
> >             "\n  %*s", width, "ts_rate_decimator:");
> > @@ -346,6 +364,8 @@ static av_cold int vpx_free(AVCodecContext *avctx)
> >      }
> >  #endif
> >
> > +    av_freep(&ctx->ts_layer_flags);
> > +
> >      vpx_codec_destroy(&ctx->encoder);
> >      if (ctx->is_alpha) {
> >          vpx_codec_destroy(&ctx->encoder_alpha);
> > @@ -370,23 +390,153 @@ static void vp8_ts_parse_int_array(int *dest,
> char *value, size_t value_len, int
> >      }
> >  }
> >
> > -static int vp8_ts_param_parse(struct vpx_codec_enc_cfg *enccfg, char
> *key, char *value)
> > +static void set_temporal_layer_pattern(int layering_mode,
> > +                                       vpx_codec_enc_cfg_t *cfg,
> > +                                       int *layer_flags,
> > +                                       int *flag_periodicity)
> > +{
> > +    switch (layering_mode) {
> > +    case 2: {
> > +        /**
> > +         * 2-layers, 2-frame period.
> > +         */
> > +        int ids[2] = { 0, 1 };
> > +        cfg->ts_periodicity = 2;
> > +        *flag_periodicity = 2;
> > +        cfg->ts_number_layers = 2;
> > +        cfg->ts_rate_decimator[0] = 2;
> > +        cfg->ts_rate_decimator[1] = 1;
> > +        memcpy(cfg->ts_layer_id, ids, sizeof(ids));
> > +
> > +        layer_flags[0] =
> > +             VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
> > +             VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
> > +        layer_flags[1] =
> > +            VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_GF |
> > +            VP8_EFLAG_NO_UPD_LAST |
> > +            VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_REF_GF;
> > +        break;
> > +    }
> > +    case 3: {
> > +        /**
> > +         * 3-layers structure with one reference frame.
> > +         *  This works same as temporal_layering_mode 3.
> > +         *
> > +         * 3-layers, 4-frame period.
> > +         */
> > +        int ids[4] = { 0, 2, 1, 2 };
> > +        cfg->ts_periodicity = 4;
> > +        *flag_periodicity = 4;
> > +        cfg->ts_number_layers = 3;
> > +        cfg->ts_rate_decimator[0] = 4;
> > +        cfg->ts_rate_decimator[1] = 2;
> > +        cfg->ts_rate_decimator[2] = 1;
> > +        memcpy(cfg->ts_layer_id, ids, sizeof(ids));
> > +
> > +        /**
> > +         * 0=L, 1=GF, 2=ARF,
>
> my point with the earlier comment was that you document 3 indices, but
> set 4. if [3] won't be used in this case then the assignment could be
> removed.
>
[WJ] the ids[] and layer_flags[] are indexed up to the frame# in
ts_periodicity..
so that goes from 0-3 (period of 4), whereas ts_rate_decimator[] is indexed
by the temporal layer id itself (ids[]) which goes from 0 to 2. Am I
missing something?


> > [...]
> > +
> > +static int vpx_ts_param_parse(VPxContext *ctx,
> > +                              struct vpx_codec_enc_cfg *enccfg,
> > +                              char *key, char *value,
> > +                              const enum AVCodecID codec_id)
>
> it's not common in this code base to mark non-pointer parameters
> const. you can merge the first and second lines and the third and
> fourth since they're not overly long.
>
> [WJ] my mistake. will make the changes.


> > [...]
> > +    if (ts_layering_mode) {
> > +      // make sure the ts_layering_mode comes at the end of the
> ts_parameter string to ensure that
> > +      // correct configuration is done.
> > +      ctx->ts_layer_flags = av_malloc(sizeof(*ctx->ts_layer_flags) *
> VPX_TS_MAX_PERIODICITY);
> > +      set_temporal_layer_pattern(ts_layering_mode, enccfg,
> ctx->ts_layer_flags, &enccfg->ts_periodicity);
>
> indent is incorrect, it should be 4 spaces.
>
[WJ] will make the change.

Thank you,

Wonkap
James Zern Jan. 17, 2020, 8:55 p.m. UTC | #3
On Wed, Jan 15, 2020 at 6:20 PM Wonkap Jang
<wonkap-at-google.com@ffmpeg.org> wrote:
>
> HI,
> My comments are in-line.
>
> On Tue, Jan 14, 2020 at 8:55 PM James Zern <jzern-at-google.com@ffmpeg.org>
> wrote:
>
> > On Tue, Jan 14, 2020 at 11:07 AM Wonkap Jang
> > <wonkap-at-google.com@ffmpeg.org> wrote:
> > > [...]
> > > +
> > > +        /**
> > > +         * 0=L, 1=GF, 2=ARF,
> >
> > my point with the earlier comment was that you document 3 indices, but
> > set 4. if [3] won't be used in this case then the assignment could be
> > removed.
> >
> [WJ] the ids[] and layer_flags[] are indexed up to the frame# in
> ts_periodicity..
> so that goes from 0-3 (period of 4), whereas ts_rate_decimator[] is indexed
> by the temporal layer id itself (ids[]) which goes from 0 to 2. Am I
> missing something?
>

Maybe I am. I took 0=L, 1=GF, 2=ARF to be comments about the indices
and was wondering why 3 wasn't documented or whether it was needed.
diff mbox series

Patch

diff --git a/doc/encoders.texi b/doc/encoders.texi
index 61e674cf96..88429aed4c 100644
--- a/doc/encoders.texi
+++ b/doc/encoders.texi
@@ -1885,8 +1885,6 @@  Enable error resiliency features.
 Increase sharpness at the expense of lower PSNR.
 The valid range is [0, 7].
 
-@item VP8-specific options
-@table @option
 @item ts-parameters
 Sets the temporal scalability configuration using a :-separated list of
 key=value pairs. For example, to specify temporal scalability parameters
@@ -1894,7 +1892,7 @@  with @code{ffmpeg}:
 @example
 ffmpeg -i INPUT -c:v libvpx -ts-parameters ts_number_layers=3:\
 ts_target_bitrate=250,500,1000:ts_rate_decimator=4,2,1:\
-ts_periodicity=4:ts_layer_id=0,2,1,2 OUTPUT
+ts_periodicity=4:ts_layer_id=0,2,1,2:ts_layering_mode=3 OUTPUT
 @end example
 Below is a brief explanation of each of the parameters, please
 refer to @code{struct vpx_codec_enc_cfg} in @code{vpx/vpx_encoder.h} for more
@@ -1911,6 +1909,20 @@  Frame rate decimation factor for each temporal layer.
 Length of the sequence defining frame temporal layer membership.
 @item ts_layer_id
 Template defining the membership of frames to temporal layers.
+@item ts_layering_mode
+(optional) Selecting the temporal structure from a set of pre-defined temporal layering modes.
+Currently supports the following options.
+@table @option
+@item 0
+No temporal layering flags are provided internally,
+relies on flags being passed in using metadata in AVFrame.
+@item 2
+Two temporal layers. 0-1...
+@item 3
+Three temporal layers. 0-2-1-2...; with single reference frame.
+@item 4
+Same as option "3", except there is a dependency between
+the two temporal layer 2 frames within the temporal period.
 @end table
 @end table
 
diff --git a/libavcodec/libvpxenc.c b/libavcodec/libvpxenc.c
index 0b8a070304..c2e828180c 100644
--- a/libavcodec/libvpxenc.c
+++ b/libavcodec/libvpxenc.c
@@ -100,7 +100,9 @@  typedef struct VPxEncoderContext {
     int rc_undershoot_pct;
     int rc_overshoot_pct;
 
-    AVDictionary *vp8_ts_parameters;
+    AVDictionary *vpx_ts_parameters;
+    int *ts_layer_flags;
+    int current_temporal_idx;
 
     // VP9-only
     int lossless;
@@ -137,6 +139,7 @@  static const char *const ctlidstr[] = {
     [VP8E_SET_CQ_LEVEL]          = "VP8E_SET_CQ_LEVEL",
     [VP8E_SET_MAX_INTRA_BITRATE_PCT] = "VP8E_SET_MAX_INTRA_BITRATE_PCT",
     [VP8E_SET_SHARPNESS]               = "VP8E_SET_SHARPNESS",
+    [VP8E_SET_TEMPORAL_LAYER_ID]       = "VP8E_SET_TEMPORAL_LAYER_ID",
 #if CONFIG_LIBVPX_VP9_ENCODER
     [VP9E_SET_LOSSLESS]                = "VP9E_SET_LOSSLESS",
     [VP9E_SET_TILE_COLUMNS]            = "VP9E_SET_TILE_COLUMNS",
@@ -144,6 +147,11 @@  static const char *const ctlidstr[] = {
     [VP9E_SET_FRAME_PARALLEL_DECODING] = "VP9E_SET_FRAME_PARALLEL_DECODING",
     [VP9E_SET_AQ_MODE]                 = "VP9E_SET_AQ_MODE",
     [VP9E_SET_COLOR_SPACE]             = "VP9E_SET_COLOR_SPACE",
+    [VP9E_SET_SVC_LAYER_ID]            = "VP9E_SET_SVC_LAYER_ID",
+#if VPX_ENCODER_ABI_VERSION >= 12
+    [VP9E_SET_SVC_PARAMETERS]          = "VP9E_SET_SVC_PARAMETERS",
+#endif
+    [VP9E_SET_SVC]                     = "VP9E_SET_SVC",
 #if VPX_ENCODER_ABI_VERSION >= 11
     [VP9E_SET_COLOR_RANGE]             = "VP9E_SET_COLOR_RANGE",
 #endif
@@ -221,10 +229,20 @@  static av_cold void dump_enc_cfg(AVCodecContext *avctx,
            width, "rc_overshoot_pct:",  cfg->rc_overshoot_pct);
     av_log(avctx, level, "temporal layering settings\n"
            "  %*s%u\n", width, "ts_number_layers:", cfg->ts_number_layers);
-    av_log(avctx, level,
-           "\n  %*s", width, "ts_target_bitrate:");
-    for (i = 0; i < VPX_TS_MAX_LAYERS; i++)
-        av_log(avctx, level, "%u ", cfg->ts_target_bitrate[i]);
+    if (avctx->codec_id == AV_CODEC_ID_VP8) {
+        av_log(avctx, level,
+            "\n  %*s", width, "ts_target_bitrate:");
+        for (i = 0; i < VPX_TS_MAX_LAYERS; i++)
+            av_log(avctx, level, "%u ", cfg->ts_target_bitrate[i]);
+    }
+#if (VPX_ENCODER_ABI_VERSION >= 12) && CONFIG_LIBVPX_VP9_ENCODER
+    if (avctx->codec_id == AV_CODEC_ID_VP9) {
+        av_log(avctx, level,
+            "\n  %*s", width, "layer_target_bitrate:");  
+        for (i = 0; i < VPX_TS_MAX_LAYERS; i++)
+            av_log(avctx, level, "%u ", cfg->layer_target_bitrate[i]);
+    }
+#endif
     av_log(avctx, level, "\n");
     av_log(avctx, level,
            "\n  %*s", width, "ts_rate_decimator:");
@@ -346,6 +364,8 @@  static av_cold int vpx_free(AVCodecContext *avctx)
     }
 #endif
 
+    av_freep(&ctx->ts_layer_flags);
+
     vpx_codec_destroy(&ctx->encoder);
     if (ctx->is_alpha) {
         vpx_codec_destroy(&ctx->encoder_alpha);
@@ -370,23 +390,153 @@  static void vp8_ts_parse_int_array(int *dest, char *value, size_t value_len, int
     }
 }
 
-static int vp8_ts_param_parse(struct vpx_codec_enc_cfg *enccfg, char *key, char *value)
+static void set_temporal_layer_pattern(int layering_mode,
+                                       vpx_codec_enc_cfg_t *cfg,
+                                       int *layer_flags,
+                                       int *flag_periodicity)
+{
+    switch (layering_mode) {
+    case 2: {
+        /**
+         * 2-layers, 2-frame period.
+         */
+        int ids[2] = { 0, 1 };
+        cfg->ts_periodicity = 2;
+        *flag_periodicity = 2;
+        cfg->ts_number_layers = 2;
+        cfg->ts_rate_decimator[0] = 2;
+        cfg->ts_rate_decimator[1] = 1;
+        memcpy(cfg->ts_layer_id, ids, sizeof(ids));
+
+        layer_flags[0] =
+             VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
+             VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
+        layer_flags[1] =
+            VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_GF |
+            VP8_EFLAG_NO_UPD_LAST |
+            VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_REF_GF;
+        break;
+    }
+    case 3: {
+        /**
+         * 3-layers structure with one reference frame.
+         *  This works same as temporal_layering_mode 3.
+         *
+         * 3-layers, 4-frame period.
+         */
+        int ids[4] = { 0, 2, 1, 2 };
+        cfg->ts_periodicity = 4;
+        *flag_periodicity = 4;
+        cfg->ts_number_layers = 3;
+        cfg->ts_rate_decimator[0] = 4;
+        cfg->ts_rate_decimator[1] = 2;
+        cfg->ts_rate_decimator[2] = 1;
+        memcpy(cfg->ts_layer_id, ids, sizeof(ids));
+
+        /**
+         * 0=L, 1=GF, 2=ARF,
+         * Intra-layer prediction disabled.
+         */
+        layer_flags[0] =
+            VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
+            VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
+        layer_flags[1] =
+            VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
+            VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
+            VP8_EFLAG_NO_UPD_ARF;
+        layer_flags[2] =
+            VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
+            VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
+        layer_flags[3] =
+            VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_ARF |
+            VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
+            VP8_EFLAG_NO_UPD_ARF;
+        break;
+    }
+    case 4: {
+        /**
+         * 3-layers structure.
+         * added dependency between the two TL2 frames (on top of case 3).
+         * 3-layers, 4-frame period.
+         */
+        int ids[4] = { 0, 2, 1, 2 };
+        cfg->ts_periodicity = 4;
+        *flag_periodicity = 4;
+        cfg->ts_number_layers = 3;
+        cfg->ts_rate_decimator[0] = 4;
+        cfg->ts_rate_decimator[1] = 2;
+        cfg->ts_rate_decimator[2] = 1;
+        memcpy(cfg->ts_layer_id, ids, sizeof(ids));
+
+        /**
+         * 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled.
+         */
+        layer_flags[0] =
+            VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
+            VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
+        layer_flags[1] =
+            VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
+            VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
+        layer_flags[2] =
+            VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
+            VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
+        layer_flags[3] =
+            VP8_EFLAG_NO_REF_LAST |
+            VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
+            VP8_EFLAG_NO_UPD_ARF;
+        break;
+    }
+    default:
+        /**
+         * do not change the layer_flags or the flag_periodicity in this case;
+         * it might be that the code is using external flags to be used.
+         */
+        break;
+
+    }
+}
+
+static int vpx_ts_param_parse(VPxContext *ctx,
+                              struct vpx_codec_enc_cfg *enccfg,
+                              char *key, char *value,
+                              const enum AVCodecID codec_id)
 {
     size_t value_len = strlen(value);
+    int ts_layering_mode = 0;
 
     if (!value_len)
         return -1;
 
     if (!strcmp(key, "ts_number_layers"))
         enccfg->ts_number_layers = strtoul(value, &value, 10);
-    else if (!strcmp(key, "ts_target_bitrate"))
-        vp8_ts_parse_int_array(enccfg->ts_target_bitrate, value, value_len, VPX_TS_MAX_LAYERS);
-    else if (!strcmp(key, "ts_rate_decimator"))
-      vp8_ts_parse_int_array(enccfg->ts_rate_decimator, value, value_len, VPX_TS_MAX_LAYERS);
-    else if (!strcmp(key, "ts_periodicity"))
+    else if (!strcmp(key, "ts_target_bitrate")) {
+        if (codec_id == AV_CODEC_ID_VP8)
+            vp8_ts_parse_int_array(enccfg->ts_target_bitrate, value, value_len, VPX_TS_MAX_LAYERS);
+#if (VPX_ENCODER_ABI_VERSION >= 12) && CONFIG_LIBVPX_VP9_ENCODER
+        if (codec_id == AV_CODEC_ID_VP9)
+            vp8_ts_parse_int_array(enccfg->layer_target_bitrate, value, value_len, VPX_TS_MAX_LAYERS);
+#endif
+    } else if (!strcmp(key, "ts_rate_decimator")) {
+        vp8_ts_parse_int_array(enccfg->ts_rate_decimator, value, value_len, VPX_TS_MAX_LAYERS);
+    } else if (!strcmp(key, "ts_periodicity")) {
         enccfg->ts_periodicity = strtoul(value, &value, 10);
-    else if (!strcmp(key, "ts_layer_id"))
+    } else if (!strcmp(key, "ts_layer_id")) {
         vp8_ts_parse_int_array(enccfg->ts_layer_id, value, value_len, VPX_TS_MAX_PERIODICITY);
+    } else if (!strcmp(key, "ts_layering_mode")) {
+        /* option for pre-defined temporal structures in function set_temporal_layer_pattern. */
+        ts_layering_mode = strtoul(value, &value, 4);
+    }
+
+#if (VPX_ENCODER_ABI_VERSION >= 12) && CONFIG_LIBVPX_VP9_ENCODER
+    enccfg->temporal_layering_mode = 1; // only bypass mode is supported for now.
+    enccfg->ss_number_layers = 1; // TODO: add spatial scalability support.
+#endif
+    if (ts_layering_mode) {
+      // make sure the ts_layering_mode comes at the end of the ts_parameter string to ensure that
+      // correct configuration is done.
+      ctx->ts_layer_flags = av_malloc(sizeof(*ctx->ts_layer_flags) * VPX_TS_MAX_PERIODICITY);
+      set_temporal_layer_pattern(ts_layering_mode, enccfg, ctx->ts_layer_flags, &enccfg->ts_periodicity);
+    }
 
     return 0;
 }
@@ -590,7 +740,9 @@  static av_cold int vpx_init(AVCodecContext *avctx,
     vpx_img_fmt_t img_fmt = VPX_IMG_FMT_I420;
 #if CONFIG_LIBVPX_VP9_ENCODER
     vpx_codec_caps_t codec_caps = vpx_codec_get_caps(iface);
+    vpx_svc_extra_cfg_t svc_params;
 #endif
+    AVDictionaryEntry* en = NULL;
 
     av_log(avctx, AV_LOG_INFO, "%s\n", vpx_codec_version_str());
     av_log(avctx, AV_LOG_VERBOSE, "%s\n", vpx_codec_build_config());
@@ -648,6 +800,9 @@  static av_cold int vpx_init(AVCodecContext *avctx,
     if (avctx->bit_rate) {
         enccfg.rc_target_bitrate = av_rescale_rnd(avctx->bit_rate, 1, 1000,
                                                   AV_ROUND_NEAR_INF);
+#if CONFIG_LIBVPX_VP9_ENCODER
+        enccfg.ss_target_bitrate[0] = enccfg.rc_target_bitrate;
+#endif
     } else {
         // Set bitrate to default value. Also sets CRF to default if needed.
         set_vpx_defaults(avctx, &enccfg);
@@ -757,14 +912,11 @@  FF_ENABLE_DEPRECATION_WARNINGS
 
     enccfg.g_error_resilient = ctx->error_resilient || ctx->flags & VP8F_ERROR_RESILIENT;
 
-    if (CONFIG_LIBVPX_VP8_ENCODER && avctx->codec_id == AV_CODEC_ID_VP8) {
-        AVDictionaryEntry* en = NULL;
-        while ((en = av_dict_get(ctx->vp8_ts_parameters, "", en, AV_DICT_IGNORE_SUFFIX))) {
-            if (vp8_ts_param_parse(&enccfg, en->key, en->value) < 0)
-                av_log(avctx, AV_LOG_WARNING,
-                       "Error parsing option '%s = %s'.\n",
-                       en->key, en->value);
-        }
+    while ((en = av_dict_get(ctx->vpx_ts_parameters, "", en, AV_DICT_IGNORE_SUFFIX))) {
+        if (vpx_ts_param_parse(ctx, &enccfg, en->key, en->value, avctx->codec_id) < 0)
+            av_log(avctx, AV_LOG_WARNING,
+                   "Error parsing option '%s = %s'.\n",
+                   en->key, en->value);
     }
 
     dump_enc_cfg(avctx, &enccfg);
@@ -774,7 +926,21 @@  FF_ENABLE_DEPRECATION_WARNINGS
         log_encoder_error(avctx, "Failed to initialize encoder");
         return AVERROR(EINVAL);
     }
-
+#if CONFIG_LIBVPX_VP9_ENCODER
+    if (avctx->codec_id == AV_CODEC_ID_VP9 && enccfg.ts_number_layers > 1) {
+        memset(&svc_params, 0, sizeof(svc_params));
+        for (int i = 0; i < enccfg.ts_number_layers; ++i) {
+            svc_params.max_quantizers[i] = enccfg.rc_max_quantizer;
+            svc_params.min_quantizers[i] = enccfg.rc_min_quantizer;
+        }
+        svc_params.scaling_factor_num[0] = enccfg.g_h;
+        svc_params.scaling_factor_den[0] = enccfg.g_h;
+#if VPX_ENCODER_ABI_VERSION >= 12
+        codecctl_int(avctx, VP9E_SET_SVC, 1);
+        codecctl_intp(avctx, VP9E_SET_SVC_PARAMETERS, (int *)&svc_params);
+#endif
+    }
+#endif
     if (ctx->is_alpha) {
         enccfg_alpha = enccfg;
         res = vpx_codec_enc_init(&ctx->encoder_alpha, iface, &enccfg_alpha, flags);
@@ -1321,6 +1487,9 @@  static int vpx_encode(AVCodecContext *avctx, AVPacket *pkt,
     int64_t timestamp = 0;
     int res, coded_size;
     vpx_enc_frame_flags_t flags = 0;
+    const struct vpx_codec_enc_cfg *enccfg = ctx->encoder.config.enc;
+    vpx_svc_layer_id_t layer_id;
+    int layer_id_valid = 0;
 
     if (frame) {
         const AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_REGIONS_OF_INTEREST);
@@ -1368,6 +1537,42 @@  static int vpx_encode(AVCodecContext *avctx, AVPacket *pkt,
         }
     }
 
+    // this is for encoding with preset temporal layering patterns defined in
+    // set_temporal_layer_pattern function.
+    if (enccfg->ts_number_layers > 1 && ctx->ts_layer_flags) {
+        if (flags & VPX_EFLAG_FORCE_KF) {
+            // keyframe, reset temporal layering.
+            ctx->current_temporal_idx = 0;
+            flags = VPX_EFLAG_FORCE_KF;
+        } else {
+            flags = 0;
+        }
+
+        /* get the flags from the temporal layer configuration. */
+        flags |= ctx->ts_layer_flags[ctx->current_temporal_idx];
+
+        memset(&layer_id, 0, sizeof(layer_id));
+#if VPX_ENCODER_ABI_VERSION >= 12
+        layer_id.spatial_layer_id = 0;
+#endif
+        layer_id.temporal_layer_id = enccfg->ts_layer_id[ctx->current_temporal_idx];
+#ifdef VPX_CTRL_VP9E_SET_MAX_INTER_BITRATE_PCT
+        layer_id.temporal_layer_id_per_spatial[0] = layer_id.temporal_layer_id;
+#endif
+        layer_id_valid = 1;
+    }
+
+    if (layer_id_valid) {
+        if (avctx->codec_id == AV_CODEC_ID_VP8) {
+            codecctl_int(avctx, VP8E_SET_TEMPORAL_LAYER_ID, layer_id.temporal_layer_id);
+        }
+#if CONFIG_LIBVPX_VP9_ENCODER && VPX_ENCODER_ABI_VERSION >= 12
+        else if (avctx->codec_id == AV_CODEC_ID_VP9) {
+            codecctl_intp(avctx, VP9E_SET_SVC_LAYER_ID, (int *)&layer_id);
+        }
+#endif
+    }
+
     res = vpx_codec_encode(&ctx->encoder, rawimg, timestamp,
                            avctx->ticks_per_frame, flags, ctx->deadline);
     if (res != VPX_CODEC_OK) {
@@ -1397,6 +1602,8 @@  static int vpx_encode(AVCodecContext *avctx, AVPacket *pkt,
         }
         av_base64_encode(avctx->stats_out, b64_size, ctx->twopass_stats.buf,
                          ctx->twopass_stats.sz);
+    } else if (enccfg->ts_number_layers > 1 && ctx->ts_layer_flags) {
+        ctx->current_temporal_idx = (ctx->current_temporal_idx + 1) % enccfg->ts_periodicity;
     }
 
     *got_packet = !!coded_size;
@@ -1435,6 +1642,7 @@  static int vpx_encode(AVCodecContext *avctx, AVPacket *pkt,
     { "noise-sensitivity", "Noise sensitivity", OFFSET(noise_sensitivity), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 4, VE}, \
     { "undershoot-pct",  "Datarate undershoot (min) target (%)", OFFSET(rc_undershoot_pct), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 100, VE }, \
     { "overshoot-pct",   "Datarate overshoot (max) target (%)", OFFSET(rc_overshoot_pct), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1000, VE }, \
+    { "ts-parameters",   "Temporal scaling configuration using a :-separated list of key=value parameters", OFFSET(vpx_ts_parameters), AV_OPT_TYPE_DICT, {.str=NULL},  0,  0, VE}, \
 
 #define LEGACY_OPTIONS \
     {"speed", "", offsetof(VPxContext, cpu_used), AV_OPT_TYPE_INT, {.i64 = 1}, -16, 16, VE}, \
@@ -1454,8 +1662,6 @@  static const AVOption vp8_options[] = {
     { "auto-alt-ref",    "Enable use of alternate reference "
                          "frames (2-pass only)",                        OFFSET(auto_alt_ref),    AV_OPT_TYPE_INT, {.i64 = -1}, -1,  2, VE},
     { "cpu-used",        "Quality/Speed ratio modifier",                OFFSET(cpu_used),        AV_OPT_TYPE_INT, {.i64 = 1}, -16, 16, VE},
-    { "ts-parameters",   "Temporal scaling configuration using a "
-                         ":-separated list of key=value parameters",    OFFSET(vp8_ts_parameters), AV_OPT_TYPE_DICT, {.str=NULL},  0,  0, VE},
     LEGACY_OPTIONS
     { NULL }
 };